From 2dd30877885081de966b5d85edc3da31f55610ed Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 08:33:55 +0100 Subject: [PATCH 01/23] Build: Bump com.google.errorprone:error_prone_annotations (#9972) Bumps [com.google.errorprone:error_prone_annotations](https://github.com/google/error-prone) from 2.24.1 to 2.26.1. - [Release notes](https://github.com/google/error-prone/releases) - [Commits](https://github.com/google/error-prone/compare/v2.24.1...v2.26.1) --- updated-dependencies: - dependency-name: com.google.errorprone:error_prone_annotations dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 867c87374e8e..3b7a8e08fab1 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -36,7 +36,7 @@ calcite = "1.10.0" delta-standalone = "3.1.0" delta-spark = "3.1.0" esotericsoftware-kryo = "4.0.2" -errorprone-annotations = "2.24.1" +errorprone-annotations = "2.26.1" findbugs-jsr305 = "3.0.2" flink116 = { strictly = "1.16.3"} flink117 = { strictly = "1.17.2"} From 54246a06a37422a066dec4596c3a0aa888d1f1c1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 08:35:27 +0100 Subject: [PATCH 02/23] Build: Bump org.awaitility:awaitility from 4.2.0 to 4.2.1 (#9970) Bumps [org.awaitility:awaitility](https://github.com/awaitility/awaitility) from 4.2.0 to 4.2.1. - [Changelog](https://github.com/awaitility/awaitility/blob/master/changelog.txt) - [Commits](https://github.com/awaitility/awaitility/compare/awaitility-4.2.0...awaitility-4.2.1) --- updated-dependencies: - dependency-name: org.awaitility:awaitility dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 3b7a8e08fab1..f8d912afb4b4 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -27,7 +27,7 @@ aircompressor = "0.26" arrow = "15.0.0" avro = "1.11.3" assertj-core = "3.25.3" -awaitility = "4.2.0" +awaitility = "4.2.1" awssdk-bom = "2.24.5" azuresdk-bom = "1.2.20" awssdk-s3accessgrants = "2.0.0" From 1c5022785b84ed46d921ab44ca9c03eb02790b7b Mon Sep 17 00:00:00 2001 From: Alex Merced Date: Mon, 18 Mar 2024 03:37:45 -0400 Subject: [PATCH 03/23] Docs: Add 13 Dremio Blogs + Fix a few incorrect dates (#9967) --- site/docs/blogs.md | 68 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/site/docs/blogs.md b/site/docs/blogs.md index 24e472c19500..746eef97d663 100644 --- a/site/docs/blogs.md +++ b/site/docs/blogs.md @@ -22,6 +22,41 @@ title: "Blogs" Here is a list of company blogs that talk about Iceberg. The blogs are ordered from most recent to oldest. +### [The Apache Iceberg Lakehouse: The Great Data Equalizer](https://amdatalakehouse.substack.com/p/the-apache-iceberg-lakehouse-the) +**Date**: March 6th, 2024, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [Data Lakehouse Versioning Comparison: (Nessie, Apache Iceberg, LakeFS)](https://www.dremio.com/blog/data-lakehouse-versioning-comparison-nessie-apache-iceberg-lakefs/) +**Date**: March 5th, 2024, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [What is Lakehouse Management?: Git-for-Data, Automated Apache Iceberg Table Maintenance and more](https://www.dremio.com/blog/what-is-lakehouse-management-git-for-data-automated-apache-iceberg-table-maintenance-and-more/) +**Date**: February 23rd, 2024, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [What is DataOps? Automating Data Management on the Apache Iceberg Lakehouse](https://www.dremio.com/blog/what-is-dataops-automating-data-management-on-the-apache-iceberg-lakehouse/) +**Date**: February 23rd, 2024, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [What is the Data Lakehouse and the Role of Apache Iceberg, Nessie and Dremio?](https://amdatalakehouse.substack.com/p/the-apache-iceberg-lakehouse-the) +**Date**: February 21st, 2024, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [Ingesting Data Into Apache Iceberg Tables with Dremio: A Unified Path to Iceberg](https://www.dremio.com/blog/ingesting-data-into-apache-iceberg-tables-with-dremio-a-unified-path-to-iceberg/) +**Date**: February 1st, 2024, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [Open Source and the Data Lakehouse: Apache Arrow, Apache Iceberg, Nessie and Dremio](https://www.dremio.com/blog/open-source-and-the-data-lakehouse-apache-arrow-apache-iceberg-nessie-and-dremio/) +**Date**: February 1st, 2024, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + ### [How not to use Apache Iceberg](https://medium.com/@ajanthabhat/how-not-to-use-apache-iceberg-046ae7e7c884) **Date**: January 23rd, 2024, **Company**: Dremio @@ -37,6 +72,16 @@ Here is a list of company blogs that talk about Iceberg. The blogs are ordered f **Authors**: [Ayush Saxena](https://www.linkedin.com/in/ayush151/) +### [Getting Started with Flink SQL and Apache Iceberg](https://www.dremio.com/blog/getting-started-with-flink-sql-and-apache-iceberg/) +**Date**: August 8th, 2023, **Company**: Dremio + +**Authors**: [Dipankar Mazumdar](https://www.linkedin.com/in/dipankar-mazumdar/) & [Ajantha Bhat](https://www.linkedin.com/in/ajanthabhat/) + +### [Using Flink with Apache Iceberg and Nessie](https://www.dremio.com/blog/using-flink-with-apache-iceberg-and-nessie/) +**Date**: July 28th, 2023, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + ### [From Hive Tables to Iceberg Tables: Hassle-Free](https://blog.cloudera.com/from-hive-tables-to-iceberg-tables-hassle-free/) **Date**: July 14th, 2023, **Company**: Cloudera @@ -57,23 +102,38 @@ Here is a list of company blogs that talk about Iceberg. The blogs are ordered f **Authors**: [Rui Li](https://www.linkedin.com/in/rui-li-19282979/) +### [How to Convert JSON Files Into an Apache Iceberg Table with Dremio](https://www.dremio.com/blog/how-to-convert-json-files-into-an-apache-iceberg-table-with-dremio/) +**Date**: May 31st, 2023, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [Deep Dive Into Configuring Your Apache Iceberg Catalog with Apache Spark](https://www.dremio.com/blog/deep-dive-into-configuring-your-apache-iceberg-catalog-with-apache-spark/) +**Date**: May 31st, 2023, **Company**: Dremio + +**Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) + +### [Streamlining Data Quality in Apache Iceberg with write-audit-publish & branching](https://www.dremio.com/blog/streamlining-data-quality-in-apache-iceberg-with-write-audit-publish-branching/) +**Date**: May 19th, 2023, **Company**: Dremio + +**Authors**: [Dipankar Mazumdar](https://www.linkedin.com/in/dipankar-mazumdar/) & [Ajantha Bhat](https://www.linkedin.com/in/ajanthabhat/) + ### [Introducing the Apache Iceberg Catalog Migration Tool](https://www.dremio.com/blog/introducing-the-apache-iceberg-catalog-migration-tool/) -**Date**: May 12th, 2022, **Company**: Dremio +**Date**: May 12th, 2023, **Company**: Dremio **Authors**: [Dipankar Mazumdar](https://www.linkedin.com/in/dipankar-mazumdar/) & [Ajantha Bhat](https://www.linkedin.com/in/ajanthabhat/) ### [3 Ways to Use Python with Apache Iceberg](https://www.dremio.com/blog/3-ways-to-use-python-with-apache-iceberg/) -**Date**: April 12th, 2022, **Company**: Dremio +**Date**: April 12th, 2023, **Company**: Dremio **Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) ### [3 Ways to Convert a Delta Lake Table Into an Apache Iceberg Table](https://www.dremio.com/blog/3-ways-to-convert-a-delta-lake-table-into-an-apache-iceberg-table/) -**Date**: April 3rd, 2022, **Company**: Dremio +**Date**: April 3rd, 2023, **Company**: Dremio **Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) ### [How to Convert CSV Files into an Apache Iceberg table with Dremio](https://www.dremio.com/blog/how-to-convert-csv-files-into-an-apache-iceberg-table-with-dremio/) -**Date**: April 3rd, 2022, **Company**: Dremio +**Date**: April 3rd, 2023, **Company**: Dremio **Author**: [Alex Merced](https://www.linkedin.com/in/alexmerced/) From 20bd4ca8cb2fdb8e2bb51bb41684d4bf7260d474 Mon Sep 17 00:00:00 2001 From: Manu Zhang Date: Mon, 18 Mar 2024 15:39:05 +0800 Subject: [PATCH 04/23] Build: Fix ignoring major version update in dependabot (#9981) I got the config wrong in the previous attempt #9806. This PR fixes it following the [official example](https://github.blog/changelog/2021-05-21-dependabot-version-updates-can-now-ignore-major-minor-patch-releases/) --- .github/dependabot.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index ab81fe8de75f..51a34758fe09 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -24,19 +24,24 @@ updates: schedule: interval: "weekly" day: "sunday" + ignore: + - dependency-name: "*" + update-types: ["version-update:semver-major"] - package-ecosystem: "gradle" directory: "/" schedule: interval: "weekly" day: "sunday" open-pull-requests-limit: 50 + ignore: + - dependency-name: "*" + update-types: ["version-update:semver-major"] - package-ecosystem: "pip" directory: "/" schedule: interval: "weekly" day: "sunday" open-pull-requests-limit: 5 - - ignore: - dependency-name: "*" - update-types: ["version-update:semver-major"] - + ignore: + - dependency-name: "*" + update-types: ["version-update:semver-major"] From 82137b9e7cf6ee243eb9895bc44c6b5d572e5283 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 08:40:22 +0100 Subject: [PATCH 05/23] Build: Bump nessie from 0.77.1 to 0.79.0 (#9976) Bumps `nessie` from 0.77.1 to 0.79.0. Updates `org.projectnessie.nessie:nessie-client` from 0.77.1 to 0.79.0 Updates `org.projectnessie.nessie:nessie-jaxrs-testextension` from 0.77.1 to 0.79.0 Updates `org.projectnessie.nessie:nessie-versioned-storage-inmemory-tests` from 0.77.1 to 0.79.0 Updates `org.projectnessie.nessie:nessie-versioned-storage-testextension` from 0.77.1 to 0.79.0 --- updated-dependencies: - dependency-name: org.projectnessie.nessie:nessie-client dependency-type: direct:production update-type: version-update:semver-minor - dependency-name: org.projectnessie.nessie:nessie-jaxrs-testextension dependency-type: direct:production update-type: version-update:semver-minor - dependency-name: org.projectnessie.nessie:nessie-versioned-storage-inmemory-tests dependency-type: direct:production update-type: version-update:semver-minor - dependency-name: org.projectnessie.nessie:nessie-versioned-storage-testextension dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index f8d912afb4b4..26d0a79e57b0 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -65,7 +65,7 @@ kryo-shaded = "4.0.3" microprofile-openapi-api = "3.1.1" mockito = "4.11.0" mockserver = "5.15.0" -nessie = "0.77.1" +nessie = "0.79.0" netty-buffer = "4.1.107.Final" netty-buffer-compat = "4.1.107.Final" object-client-bundle = "3.3.2" From b0a4a907fd8e27ed48ac936f6b8e9f7dce49f9d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 08:40:39 +0100 Subject: [PATCH 06/23] Build: Bump datamodel-code-generator from 0.25.4 to 0.25.5 (#9979) Bumps [datamodel-code-generator](https://github.com/koxudaxi/datamodel-code-generator) from 0.25.4 to 0.25.5. - [Release notes](https://github.com/koxudaxi/datamodel-code-generator/releases) - [Commits](https://github.com/koxudaxi/datamodel-code-generator/compare/0.25.4...0.25.5) --- updated-dependencies: - dependency-name: datamodel-code-generator dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- open-api/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/open-api/requirements.txt b/open-api/requirements.txt index 4ffabfde3e07..5b819b796841 100644 --- a/open-api/requirements.txt +++ b/open-api/requirements.txt @@ -16,4 +16,4 @@ # under the License. openapi-spec-validator==0.7.1 -datamodel-code-generator==0.25.4 +datamodel-code-generator==0.25.5 From 1bc5c7c35d5e921dfc08ab4ce4cfd0404d7a24b8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 08:41:20 +0100 Subject: [PATCH 07/23] Build: Bump mkdocs-material from 9.5.9 to 9.5.14 (#9983) Bumps [mkdocs-material](https://github.com/squidfunk/mkdocs-material) from 9.5.9 to 9.5.14. - [Release notes](https://github.com/squidfunk/mkdocs-material/releases) - [Changelog](https://github.com/squidfunk/mkdocs-material/blob/master/CHANGELOG) - [Commits](https://github.com/squidfunk/mkdocs-material/compare/9.5.9...9.5.14) --- updated-dependencies: - dependency-name: mkdocs-material dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- site/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/requirements.txt b/site/requirements.txt index 64fa41cab2af..e1efc7cbb2f9 100644 --- a/site/requirements.txt +++ b/site/requirements.txt @@ -17,7 +17,7 @@ mkdocs-awesome-pages-plugin==2.9.2 mkdocs-macros-plugin==1.0.5 -mkdocs-material==9.5.9 +mkdocs-material==9.5.14 mkdocs-material-extensions==1.3.1 mkdocs-monorepo-plugin @ git+https://github.com/bitsondatadev/mkdocs-monorepo-plugin@url-fix mkdocs-redirects==1.2.1 From f79fb3fc188a623dfb989bd3e47ade59a7cbf74e Mon Sep 17 00:00:00 2001 From: Tom Tanaka <43331405+tomtongue@users.noreply.github.com> Date: Mon, 18 Mar 2024 17:00:42 +0900 Subject: [PATCH 08/23] Core: Migrate tests to JUnit5 (#9964) --- .../apache/iceberg/TestFormatVersions.java | 38 ++-- .../apache/iceberg/TestLocationProvider.java | 138 ++++++------- .../apache/iceberg/TestManifestCaching.java | 68 ++++--- .../apache/iceberg/TestManifestCleanup.java | 89 ++++----- .../iceberg/TestManifestEncryption.java | 65 +++--- .../iceberg/TestManifestListVersions.java | 185 ++++++++--------- .../iceberg/TestManifestReaderStats.java | 146 +++++++------- .../apache/iceberg/TestManifestWriter.java | 187 ++++++++---------- .../iceberg/TestManifestWriterVersions.java | 123 ++++++------ 9 files changed, 479 insertions(+), 560 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestFormatVersions.java b/core/src/test/java/org/apache/iceberg/TestFormatVersions.java index b4f80088d2cc..2f6a01c6e603 100644 --- a/core/src/test/java/org/apache/iceberg/TestFormatVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestFormatVersions.java @@ -18,50 +18,54 @@ */ package org.apache.iceberg; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; -public class TestFormatVersions extends TableTestBase { - public TestFormatVersions() { - super(1); +import java.util.Arrays; +import java.util.List; +import org.junit.jupiter.api.TestTemplate; + +public class TestFormatVersions extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1); } - @Test + @TestTemplate public void testDefaultFormatVersion() { - Assert.assertEquals("Should default to v1", 1, table.ops().current().formatVersion()); + assertThat(table.ops().current().formatVersion()).isEqualTo(1); } - @Test + @TestTemplate public void testFormatVersionUpgrade() { TableOperations ops = table.ops(); TableMetadata base = ops.current(); ops.commit(base, base.upgradeToFormatVersion(2)); - Assert.assertEquals("Should report v2", 2, ops.current().formatVersion()); + assertThat(ops.current().formatVersion()).isEqualTo(2); } - @Test + @TestTemplate public void testFormatVersionDowngrade() { TableOperations ops = table.ops(); TableMetadata base = ops.current(); ops.commit(base, base.upgradeToFormatVersion(2)); - Assert.assertEquals("Should report v2", 2, ops.current().formatVersion()); + assertThat(ops.current().formatVersion()).isEqualTo(2); - Assertions.assertThatThrownBy(() -> ops.current().upgradeToFormatVersion(1)) + assertThatThrownBy(() -> ops.current().upgradeToFormatVersion(1)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot downgrade v2 table to v1"); - Assert.assertEquals("Should report v2", 2, ops.current().formatVersion()); + assertThat(ops.current().formatVersion()).isEqualTo(2); } - @Test + @TestTemplate public void testFormatVersionUpgradeNotSupported() { TableOperations ops = table.ops(); TableMetadata base = ops.current(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> ops.commit( base, @@ -69,6 +73,6 @@ public void testFormatVersionUpgradeNotSupported() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot upgrade table to unsupported format version: v3 (supported: v2)"); - Assert.assertEquals("Should report v1", 1, ops.current().formatVersion()); + assertThat(ops.current().formatVersion()).isEqualTo(1); } } diff --git a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java index 6afc7f0fe715..1b9c6581200a 100644 --- a/core/src/test/java/org/apache/iceberg/TestLocationProvider.java +++ b/core/src/test/java/org/apache/iceberg/TestLocationProvider.java @@ -18,27 +18,22 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.iceberg.io.LocationProvider; import org.apache.iceberg.relocated.com.google.common.base.Splitter; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestLocationProvider extends TableTestBase { - @Parameterized.Parameters - public static Object[][] parameters() { - return new Object[][] { - new Object[] {1}, new Object[] {2}, - }; - } - - public TestLocationProvider(int formatVersion) { - super(formatVersion); +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestLocationProvider extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } // publicly visible for testing to be dynamically loaded @@ -99,29 +94,25 @@ public static class InvalidNoInterfaceDynamicallyLoadedLocationProvider { // Default no-arg constructor is present, but does not impelemnt interface LocationProvider } - @Test + @TestTemplate public void testDefaultLocationProvider() { this.table.updateProperties().commit(); this.table.locationProvider().newDataLocation("my_file"); - Assert.assertEquals( - "Default data path should have table location as root", - String.format("%s/data/%s", this.table.location(), "my_file"), - this.table.locationProvider().newDataLocation("my_file")); + assertThat(this.table.locationProvider().newDataLocation("my_file")) + .isEqualTo(String.format("%s/data/%s", this.table.location(), "my_file")); } - @Test + @TestTemplate public void testDefaultLocationProviderWithCustomDataLocation() { this.table.updateProperties().set(TableProperties.WRITE_DATA_LOCATION, "new_location").commit(); this.table.locationProvider().newDataLocation("my_file"); - Assert.assertEquals( - "Default location provider should allow custom path location", - "new_location/my_file", - this.table.locationProvider().newDataLocation("my_file")); + assertThat(this.table.locationProvider().newDataLocation("my_file")) + .isEqualTo("new_location/my_file"); } - @Test + @TestTemplate public void testNoArgDynamicallyLoadedLocationProvider() { String invalidImpl = String.format( @@ -133,13 +124,11 @@ public void testNoArgDynamicallyLoadedLocationProvider() { .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, invalidImpl) .commit(); - Assert.assertEquals( - "Custom provider should take base table location", - "test_no_arg_provider/my_file", - this.table.locationProvider().newDataLocation("my_file")); + assertThat(this.table.locationProvider().newDataLocation("my_file")) + .isEqualTo("test_no_arg_provider/my_file"); } - @Test + @TestTemplate public void testTwoArgDynamicallyLoadedLocationProvider() { this.table .updateProperties() @@ -151,17 +140,15 @@ public void testTwoArgDynamicallyLoadedLocationProvider() { TwoArgDynamicallyLoadedLocationProvider.class.getSimpleName())) .commit(); - Assert.assertTrue( - String.format("Table should load impl defined in its properties"), - this.table.locationProvider() instanceof TwoArgDynamicallyLoadedLocationProvider); + assertThat(this.table.locationProvider()) + .as("Table should load impl defined in its properties") + .isInstanceOf(TwoArgDynamicallyLoadedLocationProvider.class); - Assert.assertEquals( - "Custom provider should take base table location", - String.format("%s/test_custom_provider/%s", this.table.location(), "my_file"), - this.table.locationProvider().newDataLocation("my_file")); + assertThat(this.table.locationProvider().newDataLocation("my_file")) + .isEqualTo(String.format("%s/test_custom_provider/%s", this.table.location(), "my_file")); } - @Test + @TestTemplate public void testDynamicallyLoadedLocationProviderNotFound() { String nonExistentImpl = String.format( @@ -173,7 +160,7 @@ public void testDynamicallyLoadedLocationProviderNotFound() { .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, nonExistentImpl) .commit(); - Assertions.assertThatThrownBy(() -> table.locationProvider()) + assertThatThrownBy(() -> table.locationProvider()) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith( String.format( @@ -185,7 +172,7 @@ public void testDynamicallyLoadedLocationProviderNotFound() { + "taking in the string base table location and its property string map."); } - @Test + @TestTemplate public void testInvalidNoInterfaceDynamicallyLoadedLocationProvider() { String invalidImpl = String.format( @@ -197,7 +184,7 @@ public void testInvalidNoInterfaceDynamicallyLoadedLocationProvider() { .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, invalidImpl) .commit(); - Assertions.assertThatThrownBy(() -> table.locationProvider()) + assertThatThrownBy(() -> table.locationProvider()) .isInstanceOf(IllegalArgumentException.class) .hasMessage( String.format( @@ -205,7 +192,7 @@ public void testInvalidNoInterfaceDynamicallyLoadedLocationProvider() { LocationProvider.class)); } - @Test + @TestTemplate public void testInvalidArgTypesDynamicallyLoadedLocationProvider() { String invalidImpl = String.format( @@ -217,7 +204,7 @@ public void testInvalidArgTypesDynamicallyLoadedLocationProvider() { .set(TableProperties.WRITE_LOCATION_PROVIDER_IMPL, invalidImpl) .commit(); - Assertions.assertThatThrownBy(() -> table.locationProvider()) + assertThatThrownBy(() -> table.locationProvider()) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith( String.format( @@ -225,13 +212,13 @@ public void testInvalidArgTypesDynamicallyLoadedLocationProvider() { invalidImpl, LocationProvider.class)); } - @Test + @TestTemplate public void testObjectStorageLocationProviderPathResolution() { table.updateProperties().set(TableProperties.OBJECT_STORE_ENABLED, "true").commit(); - Assert.assertTrue( - "default data location should be used when object storage path not set", - table.locationProvider().newDataLocation("file").contains(table.location() + "/data")); + assertThat(table.locationProvider().newDataLocation("file")) + .as("default data location should be used when object storage path not set") + .contains(table.location() + "/data"); String folderPath = "s3://random/folder/location"; table @@ -239,32 +226,32 @@ public void testObjectStorageLocationProviderPathResolution() { .set(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, folderPath) .commit(); - Assert.assertTrue( - "folder storage path should be used when set", - table.locationProvider().newDataLocation("file").contains(folderPath)); + assertThat(table.locationProvider().newDataLocation("file")) + .as("folder storage path should be used when set") + .contains(folderPath); String objectPath = "s3://random/object/location"; table.updateProperties().set(TableProperties.OBJECT_STORE_PATH, objectPath).commit(); - Assert.assertTrue( - "object storage path should be used when set", - table.locationProvider().newDataLocation("file").contains(objectPath)); + assertThat(table.locationProvider().newDataLocation("file")) + .as("object storage path should be used when set") + .contains(objectPath); String dataPath = "s3://random/data/location"; table.updateProperties().set(TableProperties.WRITE_DATA_LOCATION, dataPath).commit(); - Assert.assertTrue( - "write data path should be used when set", - table.locationProvider().newDataLocation("file").contains(dataPath)); + assertThat(table.locationProvider().newDataLocation("file")) + .as("write data path should be used when set") + .contains(dataPath); } - @Test + @TestTemplate public void testDefaultStorageLocationProviderPathResolution() { table.updateProperties().set(TableProperties.OBJECT_STORE_ENABLED, "false").commit(); - Assert.assertTrue( - "default data location should be used when object storage path not set", - table.locationProvider().newDataLocation("file").contains(table.location() + "/data")); + assertThat(table.locationProvider().newDataLocation("file")) + .as("default data location should be used when object storage path not set") + .contains(table.location() + "/data"); String folderPath = "s3://random/folder/location"; table @@ -272,19 +259,19 @@ public void testDefaultStorageLocationProviderPathResolution() { .set(TableProperties.WRITE_FOLDER_STORAGE_LOCATION, folderPath) .commit(); - Assert.assertTrue( - "folder storage path should be used when set", - table.locationProvider().newDataLocation("file").contains(folderPath)); + assertThat(table.locationProvider().newDataLocation("file")) + .as("folder storage path should be used when set") + .contains(folderPath); String dataPath = "s3://random/data/location"; table.updateProperties().set(TableProperties.WRITE_DATA_LOCATION, dataPath).commit(); - Assert.assertTrue( - "write data path should be used when set", - table.locationProvider().newDataLocation("file").contains(dataPath)); + assertThat(table.locationProvider().newDataLocation("file")) + .as("write data path should be used when set") + .contains(dataPath); } - @Test + @TestTemplate public void testObjectStorageWithinTableLocation() { table.updateProperties().set(TableProperties.OBJECT_STORE_ENABLED, "true").commit(); @@ -292,11 +279,10 @@ public void testObjectStorageWithinTableLocation() { String relativeLocation = fileLocation.replaceFirst(table.location(), ""); List parts = Splitter.on("/").splitToList(relativeLocation); - Assert.assertEquals("Should contain 4 parts", 4, parts.size()); - Assert.assertTrue("First part should be empty", parts.get(0).isEmpty()); - Assert.assertEquals("Second part should be data", "data", parts.get(1)); - Assert.assertFalse("Third part should be a hash value", parts.get(2).isEmpty()); - Assert.assertEquals( - "Fourth part should be the file name passed in", "test.parquet", parts.get(3)); + assertThat(parts).hasSize(4); + assertThat(parts).first().asString().isEmpty(); + assertThat(parts).element(1).asString().isEqualTo("data"); + assertThat(parts).element(2).asString().isNotEmpty(); + assertThat(parts).element(3).asString().isEqualTo("test.parquet"); } } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestCaching.java b/core/src/test/java/org/apache/iceberg/TestManifestCaching.java index 3b67cb3e695e..4469dece2f25 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestCaching.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestCaching.java @@ -19,10 +19,13 @@ package org.apache.iceberg; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import com.github.benmanes.caffeine.cache.Cache; import com.google.common.testing.GcFinalization; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.List; import java.util.Map; import java.util.UUID; @@ -35,13 +38,10 @@ import org.apache.iceberg.io.ContentCache; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestManifestCaching { @@ -54,7 +54,7 @@ public class TestManifestCaching { // Partition spec used to create tables static final PartitionSpec SPEC = PartitionSpec.builderFor(SCHEMA).bucket("data", 16).build(); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; @Test public void testPlanWithCache() throws Exception { @@ -66,7 +66,7 @@ public void testPlanWithCache() throws Exception { "true"); Table table = createTable(properties); ContentCache cache = ManifestFiles.contentCache(table.io()); - Assert.assertEquals(0, cache.estimatedCacheSize()); + assertThat(cache.estimatedCacheSize()).isEqualTo(0); int numFiles = 4; List files16Mb = newFiles(numFiles, 16 * 1024 * 1024); @@ -75,20 +75,22 @@ public void testPlanWithCache() throws Exception { // planTask with SPLIT_SIZE half of the file size TableScan scan1 = table.newScan().option(TableProperties.SPLIT_SIZE, String.valueOf(8 * 1024 * 1024)); - Assert.assertEquals( - "Should get 2 tasks per file", numFiles * 2, Iterables.size(scan1.planTasks())); - Assert.assertEquals( - "All manifest files should be cached", numFiles, cache.estimatedCacheSize()); - Assert.assertEquals( - "All manifest files should be recently loaded", numFiles, cache.stats().loadCount()); + assertThat(scan1.planTasks()).hasSize(numFiles * 2); + assertThat(cache.estimatedCacheSize()) + .as("All manifest files should be cached") + .isEqualTo(numFiles); + assertThat(cache.stats().loadCount()) + .as("All manifest files should be recently loaded") + .isEqualTo(numFiles); long missCount = cache.stats().missCount(); // planFiles and verify that cache size still the same TableScan scan2 = table.newScan(); - Assert.assertEquals("Should get 1 tasks per file", numFiles, Iterables.size(scan2.planFiles())); - Assert.assertEquals("Cache size should remain the same", numFiles, cache.estimatedCacheSize()); - Assert.assertEquals( - "All manifest file reads should hit cache", missCount, cache.stats().missCount()); + assertThat(scan2.planFiles()).hasSize(numFiles); + assertThat(cache.estimatedCacheSize()).isEqualTo(numFiles); + assertThat(cache.stats().missCount()) + .as("All manifest file reads should hit cache") + .isEqualTo(missCount); ManifestFiles.dropCache(table.io()); } @@ -110,12 +112,14 @@ public void testPlanWithSmallCache() throws Exception { // We should never hit cache. TableScan scan = table.newScan(); ContentCache cache = ManifestFiles.contentCache(scan.table().io()); - Assert.assertEquals(1, cache.maxContentLength()); - Assert.assertEquals(1, cache.maxTotalBytes()); - Assert.assertEquals("Should get 1 tasks per file", numFiles, Iterables.size(scan.planFiles())); - Assert.assertEquals("Cache should be empty", 0, cache.estimatedCacheSize()); - Assert.assertEquals("File should not be loaded through cache", 0, cache.stats().loadCount()); - Assert.assertEquals("Cache should not serve file", 0, cache.stats().requestCount()); + assertThat(cache.maxContentLength()).isEqualTo(1); + assertThat(cache.maxTotalBytes()).isEqualTo(1); + assertThat(scan.planFiles()).hasSize(numFiles); + assertThat(cache.estimatedCacheSize()).isEqualTo(0); + assertThat(cache.stats().loadCount()) + .as("File should not be loaded through cache") + .isEqualTo(0); + assertThat(cache.stats().requestCount()).as("Cache should not serve file").isEqualTo(0); ManifestFiles.dropCache(scan.table().io()); } @@ -140,8 +144,8 @@ public void testUniqueCache() throws Exception { ContentCache cache1 = ManifestFiles.contentCache(table1.io()); ContentCache cache2 = ManifestFiles.contentCache(table2.io()); ContentCache cache3 = ManifestFiles.contentCache(table2.io()); - Assert.assertNotSame(cache1, cache2); - Assert.assertSame(cache2, cache3); + assertThat(cache2).isNotSameAs(cache1); + assertThat(cache3).isSameAs(cache2); ManifestFiles.dropCache(table1.io()); ManifestFiles.dropCache(table2.io()); @@ -161,7 +165,7 @@ public void testRecreateCache() throws Exception { ManifestFiles.dropCache(table.io()); ContentCache cache2 = ManifestFiles.contentCache(table.io()); - Assert.assertNotSame(cache1, cache2); + assertThat(cache2).isNotSameAs(cache1); ManifestFiles.dropCache(table.io()); } @@ -193,10 +197,10 @@ public void testWeakFileIOReferenceCleanUp() { // Verify that manifestCache evicts all FileIO except the firstIO and lastIO. ContentCache cache1 = contentCache(manifestCache, firstIO); ContentCache cacheN = contentCache(manifestCache, lastIO); - Assert.assertSame(firstCache, cache1); - Assert.assertSame(lastCache, cacheN); - Assert.assertEquals(maxIO, manifestCache.stats().loadCount()); - Assert.assertEquals(maxIO - 2, manifestCache.stats().evictionCount()); + assertThat(cache1).isSameAs(firstCache); + assertThat(cacheN).isSameAs(lastCache); + assertThat(manifestCache.stats().loadCount()).isEqualTo(maxIO); + assertThat(manifestCache.stats().evictionCount()).isEqualTo(maxIO - 2); } /** @@ -241,7 +245,9 @@ private HadoopCatalog hadoopCatalog(Map catalogProperties) throw "hadoop", ImmutableMap.builder() .putAll(catalogProperties) - .put(CatalogProperties.WAREHOUSE_LOCATION, temp.newFolder().getAbsolutePath()) + .put( + CatalogProperties.WAREHOUSE_LOCATION, + Files.createTempDirectory(temp, "junit").toFile().getAbsolutePath()) .buildOrThrow()); return hadoopCatalog; } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestCleanup.java b/core/src/test/java/org/apache/iceberg/TestManifestCleanup.java index f7b989065be9..b5f6d05cc6a0 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestCleanup.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestCleanup.java @@ -18,84 +18,71 @@ */ package org.apache.iceberg; -import org.apache.iceberg.expressions.Expressions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestManifestCleanup extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } +import static org.assertj.core.api.Assertions.assertThat; - public TestManifestCleanup(int formatVersion) { - super(formatVersion); +import java.util.Arrays; +import java.util.List; +import org.apache.iceberg.expressions.Expressions; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestManifestCleanup extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - @Test + @TestTemplate public void testDelete() { - Assert.assertEquals("Table should start with no manifests", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertEquals( - "Table should have one append manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())) + .as("Table should have one append manifest") + .hasSize(1); table.newDelete().deleteFromRowFilter(Expressions.alwaysTrue()).commit(); - Assert.assertEquals( - "Table should have one delete manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())) + .as("Table should have one delete manifest") + .hasSize(1); table.newAppend().commit(); - Assert.assertEquals( - "Table should have no manifests", - 0, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())).isEmpty(); } - @Test + @TestTemplate public void testPartialDelete() { - Assert.assertEquals("Table should start with no manifests", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); Snapshot s1 = table.currentSnapshot(); - Assert.assertEquals( - "Table should have one append manifest", 1, s1.allManifests(table.io()).size()); + assertThat(s1.allManifests(table.io())).as("Table should have one append manifest").hasSize(1); table.newDelete().deleteFile(FILE_B).commit(); Snapshot s2 = table.currentSnapshot(); - Assert.assertEquals( - "Table should have one mixed manifest", 1, s2.allManifests(table.io()).size()); + assertThat(s2.allManifests(table.io())).as("Table should have one mixed manifest").hasSize(1); table.newAppend().commit(); Snapshot s3 = table.currentSnapshot(); - Assert.assertEquals( - "Table should have the same manifests", - s2.allManifests(table.io()), - s3.allManifests(table.io())); + assertThat(s3.allManifests(table.io())).isEqualTo(s2.allManifests(table.io())); } - @Test + @TestTemplate public void testOverwrite() { - Assert.assertEquals("Table should start with no manifests", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertEquals( - "Table should have one append manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())) + .as("Table should have one append manifest") + .hasSize(1); table .newOverwrite() @@ -104,10 +91,9 @@ public void testOverwrite() { .addFile(FILE_D) .commit(); - Assert.assertEquals( - "Table should have one delete manifest and one append manifest", - 2, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())) + .as("Table should have one delete manifest and one append manifest") + .hasSize(2); table .newOverwrite() @@ -116,9 +102,8 @@ public void testOverwrite() { .addFile(FILE_B) .commit(); - Assert.assertEquals( - "Table should have one delete manifest and one append manifest", - 2, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())) + .as("Table should have one delete manifest and one append manifest") + .hasSize(2); } } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestEncryption.java b/core/src/test/java/org/apache/iceberg/TestManifestEncryption.java index b64324ec573a..13e8985cdb56 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestEncryption.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestEncryption.java @@ -19,9 +19,13 @@ package org.apache.iceberg; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.Path; import java.util.List; import org.apache.avro.InvalidAvroMagicException; import org.apache.iceberg.encryption.EncryptedOutputFile; @@ -38,11 +42,8 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestManifestEncryption { private static final FileIO FILE_IO = new TestTables.LocalFileIO(); @@ -114,7 +115,7 @@ public class TestManifestEncryption { private static final EncryptionManager ENCRYPTION_MANAGER = EncryptionTestHelpers.createEncryptionManager(); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; @Test public void testV1Write() throws IOException { @@ -151,33 +152,31 @@ void checkEntry( Long expectedDataSequenceNumber, Long expectedFileSequenceNumber, FileContent content) { - Assert.assertEquals("Status", ManifestEntry.Status.ADDED, entry.status()); - Assert.assertEquals("Snapshot ID", (Long) SNAPSHOT_ID, entry.snapshotId()); - Assert.assertEquals( - "Data sequence number", expectedDataSequenceNumber, entry.dataSequenceNumber()); - Assert.assertEquals( - "File sequence number", expectedFileSequenceNumber, entry.fileSequenceNumber()); + assertThat(entry.status()).isEqualTo(ManifestEntry.Status.ADDED); + assertThat(entry.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(entry.dataSequenceNumber()).isEqualTo(expectedDataSequenceNumber); + assertThat(entry.fileSequenceNumber()).isEqualTo(expectedFileSequenceNumber); checkDataFile(entry.file(), content); } void checkDataFile(ContentFile dataFile, FileContent content) { // DataFile is the superclass of DeleteFile, so this method can check both - Assert.assertEquals("Content", content, dataFile.content()); - Assert.assertEquals("Path", PATH, dataFile.path()); - Assert.assertEquals("Format", FORMAT, dataFile.format()); - Assert.assertEquals("Partition", PARTITION, dataFile.partition()); - Assert.assertEquals("Record count", METRICS.recordCount(), (Long) dataFile.recordCount()); - Assert.assertEquals("Column sizes", METRICS.columnSizes(), dataFile.columnSizes()); - Assert.assertEquals("Value counts", METRICS.valueCounts(), dataFile.valueCounts()); - Assert.assertEquals("Null value counts", METRICS.nullValueCounts(), dataFile.nullValueCounts()); - Assert.assertEquals("NaN value counts", METRICS.nanValueCounts(), dataFile.nanValueCounts()); - Assert.assertEquals("Lower bounds", METRICS.lowerBounds(), dataFile.lowerBounds()); - Assert.assertEquals("Upper bounds", METRICS.upperBounds(), dataFile.upperBounds()); - Assert.assertEquals("Sort order id", SORT_ORDER_ID, dataFile.sortOrderId()); + assertThat(dataFile.content()).isEqualTo(content); + assertThat(dataFile.path()).isEqualTo(PATH); + assertThat(dataFile.format()).isEqualTo(FORMAT); + assertThat(dataFile.partition()).isEqualTo(PARTITION); + assertThat(dataFile.recordCount()).isEqualTo(METRICS.recordCount()); + assertThat(dataFile.columnSizes()).isEqualTo(METRICS.columnSizes()); + assertThat(dataFile.valueCounts()).isEqualTo(METRICS.valueCounts()); + assertThat(dataFile.nullValueCounts()).isEqualTo(METRICS.nullValueCounts()); + assertThat(dataFile.nanValueCounts()).isEqualTo(METRICS.nanValueCounts()); + assertThat(dataFile.lowerBounds()).isEqualTo(METRICS.lowerBounds()); + assertThat(dataFile.upperBounds()).isEqualTo(METRICS.upperBounds()); + assertThat(dataFile.sortOrderId()).isEqualTo(SORT_ORDER_ID); if (dataFile.content() == FileContent.EQUALITY_DELETES) { - Assert.assertEquals(EQUALITY_IDS, dataFile.equalityFieldIds()); + assertThat(dataFile.equalityFieldIds()).isEqualTo(EQUALITY_IDS); } else { - Assert.assertNull(dataFile.equalityFieldIds()); + assertThat(dataFile.equalityFieldIds()).isNull(); } } @@ -187,7 +186,9 @@ private ManifestFile writeManifest(int formatVersion) throws IOException { private ManifestFile writeManifest(DataFile file, int formatVersion) throws IOException { OutputFile manifestFile = - Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString())); + Files.localOutput( + FileFormat.AVRO.addExtension( + File.createTempFile("manifest", null, temp.toFile()).toString())); EncryptedOutputFile encryptedManifest = ENCRYPTION_MANAGER.encrypt(manifestFile); ManifestWriter writer = ManifestFiles.write(formatVersion, SPEC, encryptedManifest, SNAPSHOT_ID); @@ -201,7 +202,7 @@ private ManifestFile writeManifest(DataFile file, int formatVersion) throws IOEx private ManifestEntry readManifest(ManifestFile manifest) throws IOException { // First try to read without decryption - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> ManifestFiles.read( manifest, @@ -215,14 +216,16 @@ private ManifestEntry readManifest(ManifestFile manifest) throws IOExc ManifestFiles.read(manifest, EncryptingFileIO.combine(FILE_IO, ENCRYPTION_MANAGER), null) .entries()) { List> files = Lists.newArrayList(reader); - Assert.assertEquals("Should contain only one data file", 1, files.size()); + assertThat(files).hasSize(1); return files.get(0); } } private ManifestFile writeDeleteManifest(int formatVersion) throws IOException { OutputFile manifestFile = - Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString())); + Files.localOutput( + FileFormat.AVRO.addExtension( + File.createTempFile("manifest", null, temp.toFile()).toString())); EncryptedOutputFile encryptedManifest = ENCRYPTION_MANAGER.encrypt(manifestFile); ManifestWriter writer = ManifestFiles.writeDeleteManifest(formatVersion, SPEC, encryptedManifest, SNAPSHOT_ID); @@ -240,7 +243,7 @@ private ManifestEntry readDeleteManifest(ManifestFile manifest) thro manifest, EncryptingFileIO.combine(FILE_IO, ENCRYPTION_MANAGER), null) .entries()) { List> entries = Lists.newArrayList(reader); - Assert.assertEquals("Should contain only one delete file", 1, entries.size()); + assertThat(entries).hasSize(1); return entries.get(0); } } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java index 5b8df081c3c2..afbee9be1375 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java @@ -18,9 +18,13 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.Path; import java.util.Collection; import java.util.List; import org.apache.avro.AvroRuntimeException; @@ -39,11 +43,8 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestManifestListVersions { private static final String PATH = "s3://bucket/table/m1.avro"; @@ -98,11 +99,11 @@ public class TestManifestListVersions { PARTITION_SUMMARIES, KEY_METADATA); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; @Test public void testV1WriteDeleteManifest() { - Assertions.assertThatThrownBy(() -> writeManifestList(TEST_DELETE_MANIFEST, 1)) + assertThatThrownBy(() -> writeManifestList(TEST_DELETE_MANIFEST, 1)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot store delete manifests in a v1 table"); } @@ -112,24 +113,21 @@ public void testV1Write() throws IOException { ManifestFile manifest = writeAndReadManifestList(1); // v2 fields are not written and are defaulted - Assert.assertEquals( - "Should not contain sequence number, default to 0", 0, manifest.sequenceNumber()); - Assert.assertEquals( - "Should not contain min sequence number, default to 0", 0, manifest.minSequenceNumber()); + assertThat(manifest.sequenceNumber()).isEqualTo(0); + assertThat(manifest.minSequenceNumber()).isEqualTo(0); // v1 fields are read correctly, even though order changed - Assert.assertEquals("Path", PATH, manifest.path()); - Assert.assertEquals("Length", LENGTH, manifest.length()); - Assert.assertEquals("Spec id", SPEC_ID, manifest.partitionSpecId()); - Assert.assertEquals("Content", ManifestContent.DATA, manifest.content()); - Assert.assertEquals("Snapshot id", SNAPSHOT_ID, (long) manifest.snapshotId()); - Assert.assertEquals("Added files count", ADDED_FILES, (int) manifest.addedFilesCount()); - Assert.assertEquals( - "Existing files count", EXISTING_FILES, (int) manifest.existingFilesCount()); - Assert.assertEquals("Deleted files count", DELETED_FILES, (int) manifest.deletedFilesCount()); - Assert.assertEquals("Added rows count", ADDED_ROWS, (long) manifest.addedRowsCount()); - Assert.assertEquals("Existing rows count", EXISTING_ROWS, (long) manifest.existingRowsCount()); - Assert.assertEquals("Deleted rows count", DELETED_ROWS, (long) manifest.deletedRowsCount()); + assertThat(manifest.path()).isEqualTo(PATH); + assertThat(manifest.length()).isEqualTo(LENGTH); + assertThat(manifest.partitionSpecId()).isEqualTo(SPEC_ID); + assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); + assertThat(manifest.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(manifest.addedFilesCount()).isEqualTo(ADDED_FILES); + assertThat(manifest.existingFilesCount()).isEqualTo(EXISTING_FILES); + assertThat(manifest.deletedFilesCount()).isEqualTo(DELETED_FILES); + assertThat(manifest.addedRowsCount()).isEqualTo(ADDED_ROWS); + assertThat(manifest.existingRowsCount()).isEqualTo(EXISTING_ROWS); + assertThat(manifest.deletedRowsCount()).isEqualTo(DELETED_ROWS); } @Test @@ -137,20 +135,19 @@ public void testV2Write() throws IOException { ManifestFile manifest = writeAndReadManifestList(2); // all v2 fields should be read correctly - Assert.assertEquals("Path", PATH, manifest.path()); - Assert.assertEquals("Length", LENGTH, manifest.length()); - Assert.assertEquals("Spec id", SPEC_ID, manifest.partitionSpecId()); - Assert.assertEquals("Content", ManifestContent.DATA, manifest.content()); - Assert.assertEquals("Sequence number", SEQ_NUM, manifest.sequenceNumber()); - Assert.assertEquals("Min sequence number", MIN_SEQ_NUM, manifest.minSequenceNumber()); - Assert.assertEquals("Snapshot id", SNAPSHOT_ID, (long) manifest.snapshotId()); - Assert.assertEquals("Added files count", ADDED_FILES, (int) manifest.addedFilesCount()); - Assert.assertEquals("Added rows count", ADDED_ROWS, (long) manifest.addedRowsCount()); - Assert.assertEquals( - "Existing files count", EXISTING_FILES, (int) manifest.existingFilesCount()); - Assert.assertEquals("Existing rows count", EXISTING_ROWS, (long) manifest.existingRowsCount()); - Assert.assertEquals("Deleted files count", DELETED_FILES, (int) manifest.deletedFilesCount()); - Assert.assertEquals("Deleted rows count", DELETED_ROWS, (long) manifest.deletedRowsCount()); + assertThat(manifest.path()).isEqualTo(PATH); + assertThat(manifest.length()).isEqualTo(LENGTH); + assertThat(manifest.partitionSpecId()).isEqualTo(SPEC_ID); + assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); + assertThat(manifest.sequenceNumber()).isEqualTo(SEQ_NUM); + assertThat(manifest.minSequenceNumber()).isEqualTo(MIN_SEQ_NUM); + assertThat(manifest.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(manifest.addedFilesCount()).isEqualTo(ADDED_FILES); + assertThat(manifest.addedRowsCount()).isEqualTo(ADDED_ROWS); + assertThat(manifest.existingFilesCount()).isEqualTo(EXISTING_FILES); + assertThat(manifest.existingRowsCount()).isEqualTo(EXISTING_ROWS); + assertThat(manifest.deletedFilesCount()).isEqualTo(DELETED_FILES); + assertThat(manifest.deletedRowsCount()).isEqualTo(DELETED_ROWS); } @Test @@ -159,20 +156,16 @@ public void testV1ForwardCompatibility() throws IOException { GenericData.Record generic = readGeneric(manifestList, V1Metadata.MANIFEST_LIST_SCHEMA); // v1 metadata should match even though order changed - Assert.assertEquals("Path", PATH, generic.get("manifest_path").toString()); - Assert.assertEquals("Length", LENGTH, generic.get("manifest_length")); - Assert.assertEquals("Spec id", SPEC_ID, generic.get("partition_spec_id")); - Assert.assertEquals("Snapshot id", SNAPSHOT_ID, (long) generic.get("added_snapshot_id")); - Assert.assertEquals("Added files count", ADDED_FILES, (int) generic.get("added_files_count")); - Assert.assertEquals( - "Existing files count", EXISTING_FILES, (int) generic.get("existing_files_count")); - Assert.assertEquals( - "Deleted files count", DELETED_FILES, (int) generic.get("deleted_files_count")); - Assert.assertEquals("Added rows count", ADDED_ROWS, (long) generic.get("added_rows_count")); - Assert.assertEquals( - "Existing rows count", EXISTING_ROWS, (long) generic.get("existing_rows_count")); - Assert.assertEquals( - "Deleted rows count", DELETED_ROWS, (long) generic.get("deleted_rows_count")); + assertThat(generic.get("manifest_path")).asString().isEqualTo(PATH); + assertThat(generic.get("manifest_length")).isEqualTo(LENGTH); + assertThat(generic.get("partition_spec_id")).isEqualTo(SPEC_ID); + assertThat(generic.get("added_snapshot_id")).isEqualTo(SNAPSHOT_ID); + assertThat(generic.get("added_files_count")).isEqualTo(ADDED_FILES); + assertThat(generic.get("existing_files_count")).isEqualTo(EXISTING_FILES); + assertThat(generic.get("deleted_files_count")).isEqualTo(DELETED_FILES); + assertThat(generic.get("added_rows_count")).isEqualTo(ADDED_ROWS); + assertThat(generic.get("existing_rows_count")).isEqualTo(EXISTING_ROWS); + assertThat(generic.get("deleted_rows_count")).isEqualTo(DELETED_ROWS); assertEmptyAvroField(generic, ManifestFile.MANIFEST_CONTENT.name()); assertEmptyAvroField(generic, ManifestFile.SEQUENCE_NUMBER.name()); assertEmptyAvroField(generic, ManifestFile.MIN_SEQUENCE_NUMBER.name()); @@ -186,20 +179,16 @@ public void testV2ForwardCompatibility() throws IOException { GenericData.Record generic = readGeneric(manifestList, V1Metadata.MANIFEST_LIST_SCHEMA); // v1 metadata should match even though order changed - Assert.assertEquals("Path", PATH, generic.get("manifest_path").toString()); - Assert.assertEquals("Length", LENGTH, generic.get("manifest_length")); - Assert.assertEquals("Spec id", SPEC_ID, generic.get("partition_spec_id")); - Assert.assertEquals("Snapshot id", SNAPSHOT_ID, (long) generic.get("added_snapshot_id")); - Assert.assertEquals("Added files count", ADDED_FILES, (int) generic.get("added_files_count")); - Assert.assertEquals( - "Existing files count", EXISTING_FILES, (int) generic.get("existing_files_count")); - Assert.assertEquals( - "Deleted files count", DELETED_FILES, (int) generic.get("deleted_files_count")); - Assert.assertEquals("Added rows count", ADDED_ROWS, (long) generic.get("added_rows_count")); - Assert.assertEquals( - "Existing rows count", EXISTING_ROWS, (long) generic.get("existing_rows_count")); - Assert.assertEquals( - "Deleted rows count", DELETED_ROWS, (long) generic.get("deleted_rows_count")); + assertThat(generic.get("manifest_path")).asString().isEqualTo(PATH); + assertThat(generic.get("manifest_length")).isEqualTo(LENGTH); + assertThat(generic.get("partition_spec_id")).isEqualTo(SPEC_ID); + assertThat(generic.get("added_snapshot_id")).isEqualTo(SNAPSHOT_ID); + assertThat(generic.get("added_files_count")).isEqualTo(ADDED_FILES); + assertThat(generic.get("existing_files_count")).isEqualTo(EXISTING_FILES); + assertThat(generic.get("deleted_files_count")).isEqualTo(DELETED_FILES); + assertThat(generic.get("added_rows_count")).isEqualTo(ADDED_ROWS); + assertThat(generic.get("existing_rows_count")).isEqualTo(EXISTING_ROWS); + assertThat(generic.get("deleted_rows_count")).isEqualTo(DELETED_ROWS); assertEmptyAvroField(generic, ManifestFile.MANIFEST_CONTENT.name()); assertEmptyAvroField(generic, ManifestFile.SEQUENCE_NUMBER.name()); assertEmptyAvroField(generic, ManifestFile.MIN_SEQUENCE_NUMBER.name()); @@ -207,8 +196,8 @@ public void testV2ForwardCompatibility() throws IOException { @Test public void testManifestsWithoutRowStats() throws IOException { - File manifestListFile = temp.newFile("manifest-list.avro"); - Assert.assertTrue(manifestListFile.delete()); + File manifestListFile = File.createTempFile("manifest-list", ".avro", temp.toFile()); + assertThat(manifestListFile.delete()).isTrue(); Collection columnNamesWithoutRowStats = ImmutableList.of( @@ -250,18 +239,15 @@ public void testManifestsWithoutRowStats() throws IOException { List files = ManifestLists.read(outputFile.toInputFile()); ManifestFile manifest = Iterables.getOnlyElement(files); - Assert.assertTrue("Added files should be present", manifest.hasAddedFiles()); - Assert.assertEquals("Added files count should match", 2, (int) manifest.addedFilesCount()); - Assert.assertNull("Added rows count should be null", manifest.addedRowsCount()); - - Assert.assertTrue("Existing files should be present", manifest.hasExistingFiles()); - Assert.assertEquals( - "Existing files count should match", 3, (int) manifest.existingFilesCount()); - Assert.assertNull("Existing rows count should be null", manifest.existingRowsCount()); - - Assert.assertTrue("Deleted files should be present", manifest.hasDeletedFiles()); - Assert.assertEquals("Deleted files count should match", 4, (int) manifest.deletedFilesCount()); - Assert.assertNull("Deleted rows count should be null", manifest.deletedRowsCount()); + assertThat(manifest.hasAddedFiles()).isTrue(); + assertThat(manifest.addedFilesCount()).isEqualTo(2); + assertThat(manifest.addedRowsCount()).isNull(); + assertThat(manifest.hasExistingFiles()).isTrue(); + assertThat(manifest.existingFilesCount()).isEqualTo(3); + assertThat(manifest.existingRowsCount()).isNull(); + assertThat(manifest.hasDeletedFiles()).isTrue(); + assertThat(manifest.deletedFilesCount()).isEqualTo(4); + assertThat(manifest.deletedRowsCount()).isNull(); } @Test @@ -298,36 +284,19 @@ public void testManifestsPartitionSummary() throws IOException { List files = ManifestLists.read(manifestList); ManifestFile returnedManifest = Iterables.getOnlyElement(files); - Assert.assertEquals( - "Number of partition field summaries should match", - 2, - returnedManifest.partitions().size()); + assertThat(returnedManifest.partitions()).hasSize(2); ManifestFile.PartitionFieldSummary first = returnedManifest.partitions().get(0); - Assert.assertFalse( - "First partition field summary should not contain null", first.containsNull()); - Assert.assertNull("First partition field summary has unknown NaN", first.containsNaN()); - Assert.assertEquals( - "Lower bound for first partition field summary should match", - firstSummaryLowerBound, - first.lowerBound()); - Assert.assertEquals( - "Upper bound for first partition field summary should match", - firstSummaryUpperBound, - first.upperBound()); + assertThat(first.containsNull()).isFalse(); + assertThat(first.containsNaN()).isNull(); + assertThat(first.lowerBound()).isEqualTo(firstSummaryLowerBound); + assertThat(first.upperBound()).isEqualTo(firstSummaryUpperBound); ManifestFile.PartitionFieldSummary second = returnedManifest.partitions().get(1); - Assert.assertTrue("Second partition field summary should contain null", second.containsNull()); - Assert.assertFalse( - "Second partition field summary should not contain NaN", second.containsNaN()); - Assert.assertEquals( - "Lower bound for second partition field summary should match", - secondSummaryLowerBound, - second.lowerBound()); - Assert.assertEquals( - "Upper bound for second partition field summary should match", - secondSummaryUpperBound, - second.upperBound()); + assertThat(second.containsNull()).isTrue(); + assertThat(second.containsNaN()).isFalse(); + assertThat(second.lowerBound()).isEqualTo(secondSummaryLowerBound); + assertThat(second.upperBound()).isEqualTo(secondSummaryUpperBound); } private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException { @@ -348,7 +317,7 @@ private GenericData.Record readGeneric(InputFile manifestList, Schema schema) th try (CloseableIterable files = Avro.read(manifestList).project(schema).reuseContainers(false).build()) { List records = Lists.newLinkedList(files); - Assert.assertEquals("Should contain one manifest", 1, records.size()); + assertThat(records).hasSize(1); return records.get(0); } } @@ -356,12 +325,12 @@ private GenericData.Record readGeneric(InputFile manifestList, Schema schema) th private ManifestFile writeAndReadManifestList(int formatVersion) throws IOException { List manifests = ManifestLists.read(writeManifestList(TEST_MANIFEST, formatVersion)); - Assert.assertEquals("Should contain one manifest", 1, manifests.size()); + assertThat(manifests).hasSize(1); return manifests.get(0); } private void assertEmptyAvroField(GenericRecord record, String field) { - Assertions.assertThatThrownBy(() -> record.get(field)) + assertThatThrownBy(() -> record.get(field)) .isInstanceOf(AvroRuntimeException.class) .hasMessage("Not a valid schema field: " + field); } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestReaderStats.java b/core/src/test/java/org/apache/iceberg/TestManifestReaderStats.java index 082800238a0a..4c60a943f76c 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestReaderStats.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestReaderStats.java @@ -18,8 +18,13 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; import java.util.Map; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.io.CloseableIterable; @@ -27,21 +32,14 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestManifestReaderStats extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } - - public TestManifestReaderStats(int formatVersion) { - super(formatVersion); +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestManifestReaderStats extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } private static final Map VALUE_COUNT = ImmutableMap.of(3, 3L); @@ -65,7 +63,7 @@ public TestManifestReaderStats(int formatVersion) { .withMetrics(METRICS) .build(); - @Test + @TestTemplate public void testReadIncludesFullStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = ManifestFiles.read(manifest, FILE_IO)) { @@ -75,7 +73,7 @@ public void testReadIncludesFullStats() throws IOException { } } - @Test + @TestTemplate public void testReadEntriesWithFilterIncludesFullStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -86,7 +84,7 @@ public void testReadEntriesWithFilterIncludesFullStats() throws IOException { } } - @Test + @TestTemplate public void testReadIteratorWithFilterIncludesFullStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -96,7 +94,7 @@ public void testReadIteratorWithFilterIncludesFullStats() throws IOException { } } - @Test + @TestTemplate public void testReadEntriesWithFilterAndSelectIncludesFullStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -109,7 +107,7 @@ public void testReadEntriesWithFilterAndSelectIncludesFullStats() throws IOExcep } } - @Test + @TestTemplate public void testReadIteratorWithFilterAndSelectDropsStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -121,7 +119,7 @@ public void testReadIteratorWithFilterAndSelectDropsStats() throws IOException { } } - @Test + @TestTemplate public void testReadIteratorWithFilterAndSelectRecordCountDropsStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -133,7 +131,7 @@ public void testReadIteratorWithFilterAndSelectRecordCountDropsStats() throws IO } } - @Test + @TestTemplate public void testReadIteratorWithFilterAndSelectStatsIncludesFullStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -148,7 +146,7 @@ public void testReadIteratorWithFilterAndSelectStatsIncludesFullStats() throws I } } - @Test + @TestTemplate public void testReadIteratorWithProjectStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -156,18 +154,18 @@ public void testReadIteratorWithProjectStats() throws IOException { .project(new Schema(ImmutableList.of(DataFile.FILE_PATH, DataFile.VALUE_COUNTS)))) { DataFile entry = reader.iterator().next(); - Assert.assertEquals(FILE_PATH, entry.path()); - Assert.assertEquals(VALUE_COUNT, entry.valueCounts()); - Assert.assertNull(entry.columnSizes()); - Assert.assertNull(entry.nullValueCounts()); - Assert.assertNull(entry.nanValueCounts()); - Assert.assertNull(entry.lowerBounds()); - Assert.assertNull(entry.upperBounds()); + assertThat(entry.path()).isEqualTo(FILE_PATH); + assertThat(entry.valueCounts()).isEqualTo(VALUE_COUNT); + assertThat(entry.columnSizes()).isNull(); + assertThat(entry.nullValueCounts()).isNull(); + assertThat(entry.nanValueCounts()).isNull(); + assertThat(entry.lowerBounds()).isNull(); + assertThat(entry.upperBounds()).isNull(); assertNullRecordCount(entry); } } - @Test + @TestTemplate public void testReadEntriesWithSelectNotProjectStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -177,20 +175,20 @@ public void testReadEntriesWithSelectNotProjectStats() throws IOException { DataFile dataFile = entry.file(); // selected field is populated - Assert.assertEquals(FILE_PATH, dataFile.path()); + assertThat(dataFile.path()).isEqualTo(FILE_PATH); // not selected fields are all null and not projected - Assert.assertNull(dataFile.columnSizes()); - Assert.assertNull(dataFile.valueCounts()); - Assert.assertNull(dataFile.nullValueCounts()); - Assert.assertNull(dataFile.lowerBounds()); - Assert.assertNull(dataFile.upperBounds()); - Assert.assertNull(dataFile.nanValueCounts()); + assertThat(dataFile.columnSizes()).isNull(); + assertThat(dataFile.valueCounts()).isNull(); + assertThat(dataFile.nullValueCounts()).isNull(); + assertThat(dataFile.nanValueCounts()).isNull(); + assertThat(dataFile.lowerBounds()).isNull(); + assertThat(dataFile.upperBounds()).isNull(); assertNullRecordCount(dataFile); } } - @Test + @TestTemplate public void testReadEntriesWithSelectCertainStatNotProjectStats() throws IOException { ManifestFile manifest = writeManifest(1000L, FILE); try (ManifestReader reader = @@ -199,82 +197,76 @@ public void testReadEntriesWithSelectCertainStatNotProjectStats() throws IOExcep DataFile dataFile = reader.iterator().next(); // selected fields are populated - Assert.assertEquals(VALUE_COUNT, dataFile.valueCounts()); - Assert.assertEquals(FILE_PATH, dataFile.path()); + assertThat(dataFile.path()).isEqualTo(FILE_PATH); + assertThat(dataFile.valueCounts()).isEqualTo(VALUE_COUNT); // not selected fields are all null and not projected - Assert.assertNull(dataFile.columnSizes()); - Assert.assertNull(dataFile.nullValueCounts()); - Assert.assertNull(dataFile.nanValueCounts()); - Assert.assertNull(dataFile.lowerBounds()); - Assert.assertNull(dataFile.upperBounds()); + assertThat(dataFile.columnSizes()).isNull(); + assertThat(dataFile.nullValueCounts()).isNull(); + assertThat(dataFile.nanValueCounts()).isNull(); + assertThat(dataFile.lowerBounds()).isNull(); + assertThat(dataFile.upperBounds()).isNull(); assertNullRecordCount(dataFile); } } private void assertFullStats(DataFile dataFile) { - Assert.assertEquals(3, dataFile.recordCount()); - Assert.assertNull(dataFile.columnSizes()); - Assert.assertEquals(VALUE_COUNT, dataFile.valueCounts()); - Assert.assertEquals(NULL_VALUE_COUNTS, dataFile.nullValueCounts()); - Assert.assertEquals(NAN_VALUE_COUNTS, dataFile.nanValueCounts()); - Assert.assertEquals(LOWER_BOUNDS, dataFile.lowerBounds()); - Assert.assertEquals(UPPER_BOUNDS, dataFile.upperBounds()); + assertThat(dataFile.recordCount()).isEqualTo(3); + assertThat(dataFile.columnSizes()).isNull(); + assertThat(dataFile.valueCounts()).isEqualTo(VALUE_COUNT); + assertThat(dataFile.nullValueCounts()).isEqualTo(NULL_VALUE_COUNTS); + assertThat(dataFile.nanValueCounts()).isEqualTo(NAN_VALUE_COUNTS); + assertThat(dataFile.lowerBounds()).isEqualTo(LOWER_BOUNDS); + assertThat(dataFile.upperBounds()).isEqualTo(UPPER_BOUNDS); if (dataFile.valueCounts() != null) { - Assertions.assertThatThrownBy( - () -> dataFile.valueCounts().clear(), "Should not be modifiable") + assertThatThrownBy(() -> dataFile.valueCounts().clear(), "Should not be modifiable") .isInstanceOf(UnsupportedOperationException.class); } if (dataFile.nullValueCounts() != null) { - Assertions.assertThatThrownBy( - () -> dataFile.nullValueCounts().clear(), "Should not be modifiable") + assertThatThrownBy(() -> dataFile.nullValueCounts().clear(), "Should not be modifiable") .isInstanceOf(UnsupportedOperationException.class); } if (dataFile.nanValueCounts() != null) { - Assertions.assertThatThrownBy( - () -> dataFile.nanValueCounts().clear(), "Should not be modifiable") + assertThatThrownBy(() -> dataFile.nanValueCounts().clear(), "Should not be modifiable") .isInstanceOf(UnsupportedOperationException.class); } if (dataFile.upperBounds() != null) { - Assertions.assertThatThrownBy( - () -> dataFile.upperBounds().clear(), "Should not be modifiable") + assertThatThrownBy(() -> dataFile.upperBounds().clear(), "Should not be modifiable") .isInstanceOf(UnsupportedOperationException.class); } if (dataFile.lowerBounds() != null) { - Assertions.assertThatThrownBy( - () -> dataFile.lowerBounds().clear(), "Should not be modifiable") + assertThatThrownBy(() -> dataFile.lowerBounds().clear(), "Should not be modifiable") .isInstanceOf(UnsupportedOperationException.class); } if (dataFile.columnSizes() != null) { - Assertions.assertThatThrownBy( - () -> dataFile.columnSizes().clear(), "Should not be modifiable") + assertThatThrownBy(() -> dataFile.columnSizes().clear(), "Should not be modifiable") .isInstanceOf(UnsupportedOperationException.class); } - Assert.assertEquals(FILE_PATH, dataFile.path()); // always select file path in all test cases + assertThat(dataFile.path()).isEqualTo(FILE_PATH); // always select file path in all test cases } private void assertStatsDropped(DataFile dataFile) { - Assert.assertEquals( - 3, dataFile.recordCount()); // record count is not considered as droppable stats - Assert.assertNull(dataFile.columnSizes()); - Assert.assertNull(dataFile.valueCounts()); - Assert.assertNull(dataFile.nullValueCounts()); - Assert.assertNull(dataFile.lowerBounds()); - Assert.assertNull(dataFile.upperBounds()); - Assert.assertNull(dataFile.nanValueCounts()); - - Assert.assertEquals(FILE_PATH, dataFile.path()); // always select file path in all test cases + assertThat(dataFile.recordCount()) + .isEqualTo(3); // record count is not considered as droppable stats + assertThat(dataFile.columnSizes()).isNull(); + assertThat(dataFile.valueCounts()).isNull(); + assertThat(dataFile.nullValueCounts()).isNull(); + assertThat(dataFile.nanValueCounts()).isNull(); + assertThat(dataFile.lowerBounds()).isNull(); + assertThat(dataFile.upperBounds()).isNull(); + + assertThat(dataFile.path()).isEqualTo(FILE_PATH); // always select file path in all test cases } private void assertNullRecordCount(DataFile dataFile) { // record count is a primitive type, accessing null record count will throw NPE - Assertions.assertThatThrownBy(dataFile::recordCount).isInstanceOf(NullPointerException.class); + assertThatThrownBy(dataFile::recordCount).isInstanceOf(NullPointerException.class); } } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestWriter.java b/core/src/test/java/org/apache/iceberg/TestManifestWriter.java index 17a41f418a8e..eb7910a79fc5 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestWriter.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestWriter.java @@ -18,38 +18,33 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assumptions.assumeThat; + import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; +import java.util.Arrays; import java.util.List; import java.util.UUID; import org.apache.iceberg.ManifestEntry.Status; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.assertj.core.api.Assumptions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestManifestWriter extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } - - public TestManifestWriter(int formatVersion) { - super(formatVersion); +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestManifestWriter extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } private static final int FILE_SIZE_CHECK_ROWS_DIVISOR = 250; private static final long SMALL_FILE_SIZE = 10L; - @Test + @TestTemplate public void testManifestStats() throws IOException { ManifestFile manifest = writeManifest( @@ -64,22 +59,18 @@ public void testManifestStats() throws IOException { manifestEntry(Status.DELETED, null, newFile(5)), manifestEntry(Status.DELETED, null, newFile(2))); - Assert.assertTrue("Added files should be present", manifest.hasAddedFiles()); - Assert.assertEquals("Added files count should match", 4, (int) manifest.addedFilesCount()); - Assert.assertEquals("Added rows count should match", 40L, (long) manifest.addedRowsCount()); - - Assert.assertTrue("Existing files should be present", manifest.hasExistingFiles()); - Assert.assertEquals( - "Existing files count should match", 3, (int) manifest.existingFilesCount()); - Assert.assertEquals( - "Existing rows count should match", 26L, (long) manifest.existingRowsCount()); - - Assert.assertTrue("Deleted files should be present", manifest.hasDeletedFiles()); - Assert.assertEquals("Deleted files count should match", 2, (int) manifest.deletedFilesCount()); - Assert.assertEquals("Deleted rows count should match", 7L, (long) manifest.deletedRowsCount()); + assertThat(manifest.hasAddedFiles()).isTrue(); + assertThat(manifest.addedFilesCount()).isEqualTo(4); + assertThat(manifest.addedRowsCount()).isEqualTo(40); + assertThat(manifest.hasExistingFiles()).isTrue(); + assertThat(manifest.existingFilesCount()).isEqualTo(3); + assertThat(manifest.existingRowsCount()).isEqualTo(26); + assertThat(manifest.hasDeletedFiles()).isTrue(); + assertThat(manifest.deletedFilesCount()).isEqualTo(2); + assertThat(manifest.deletedRowsCount()).isEqualTo(7); } - @Test + @TestTemplate public void testManifestPartitionStats() throws IOException { ManifestFile manifest = writeManifest( @@ -89,48 +80,44 @@ public void testManifestPartitionStats() throws IOException { manifestEntry(Status.DELETED, null, newFile(2, TestHelpers.Row.of(3)))); List partitions = manifest.partitions(); - Assert.assertEquals("Partition field summaries count should match", 1, partitions.size()); + assertThat(partitions).hasSize(1); ManifestFile.PartitionFieldSummary partitionFieldSummary = partitions.get(0); - Assert.assertFalse("contains_null should be false", partitionFieldSummary.containsNull()); - Assert.assertFalse("contains_nan should be false", partitionFieldSummary.containsNaN()); - Assert.assertEquals( - "Lower bound should match", - Integer.valueOf(1), - Conversions.fromByteBuffer(Types.IntegerType.get(), partitionFieldSummary.lowerBound())); - Assert.assertEquals( - "Upper bound should match", - Integer.valueOf(3), - Conversions.fromByteBuffer(Types.IntegerType.get(), partitionFieldSummary.upperBound())); + assertThat(partitionFieldSummary.containsNull()).isFalse(); + assertThat(partitionFieldSummary.containsNaN()).isFalse(); + assertThat( + (Integer) + Conversions.fromByteBuffer( + Types.IntegerType.get(), partitionFieldSummary.lowerBound())) + .isEqualTo(1); + assertThat( + (Integer) + Conversions.fromByteBuffer( + Types.IntegerType.get(), partitionFieldSummary.upperBound())) + .isEqualTo(3); } - @Test + @TestTemplate public void testWriteManifestWithSequenceNumber() throws IOException { - Assume.assumeTrue("sequence number is only valid for format version > 1", formatVersion > 1); - File manifestFile = temp.newFile("manifest.avro"); - Assert.assertTrue(manifestFile.delete()); + assumeThat(formatVersion).isGreaterThan(1); + File manifestFile = File.createTempFile("manifest", ".avro", temp.toFile()); + assertThat(manifestFile.delete()).isTrue(); OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath()); ManifestWriter writer = ManifestFiles.write(formatVersion, table.spec(), outputFile, 1L); writer.add(newFile(10, TestHelpers.Row.of(1)), 1000L); writer.close(); ManifestFile manifest = writer.toManifestFile(); - Assert.assertEquals("Manifest should have no sequence number", -1L, manifest.sequenceNumber()); + assertThat(manifest.sequenceNumber()).isEqualTo(-1); ManifestReader manifestReader = ManifestFiles.read(manifest, table.io()); for (ManifestEntry entry : manifestReader.entries()) { - Assert.assertEquals( - "Custom data sequence number should be used for all manifest entries", - 1000L, - (long) entry.dataSequenceNumber()); - Assert.assertEquals( - "File sequence number must be unassigned", - ManifestWriter.UNASSIGNED_SEQ, - entry.fileSequenceNumber().longValue()); + assertThat(entry.dataSequenceNumber()).isEqualTo(1000); + assertThat(entry.fileSequenceNumber()).isEqualTo(ManifestWriter.UNASSIGNED_SEQ); } } - @Test + @TestTemplate public void testCommitManifestWithExplicitDataSequenceNumber() throws IOException { - Assume.assumeTrue("Sequence numbers are valid for format version > 1", formatVersion > 1); + assumeThat(formatVersion).isGreaterThan(1); DataFile file1 = newFile(50); DataFile file2 = newFile(50); @@ -143,10 +130,7 @@ public void testCommitManifestWithExplicitDataSequenceNumber() throws IOExceptio manifestEntry(Status.ADDED, null, dataSequenceNumber, null, file1), manifestEntry(Status.ADDED, null, dataSequenceNumber, null, file2)); - Assert.assertEquals( - "Manifest should have no sequence number before commit", - ManifestWriter.UNASSIGNED_SEQ, - manifest.sequenceNumber()); + assertThat(manifest.sequenceNumber()).isEqualTo(ManifestWriter.UNASSIGNED_SEQ); table.newFastAppend().appendManifest(manifest).commit(); @@ -154,15 +138,9 @@ public void testCommitManifestWithExplicitDataSequenceNumber() throws IOExceptio ManifestFile committedManifest = table.currentSnapshot().dataManifests(table.io()).get(0); - Assert.assertEquals( - "Committed manifest sequence number must be correct", - 1L, - committedManifest.sequenceNumber()); + assertThat(committedManifest.sequenceNumber()).isEqualTo(1); - Assert.assertEquals( - "Committed manifest min sequence number must be correct", - dataSequenceNumber, - committedManifest.minSequenceNumber()); + assertThat(committedManifest.minSequenceNumber()).isEqualTo(dataSequenceNumber); validateManifest( committedManifest, @@ -173,9 +151,9 @@ public void testCommitManifestWithExplicitDataSequenceNumber() throws IOExceptio statuses(Status.ADDED, Status.ADDED)); } - @Test + @TestTemplate public void testCommitManifestWithExistingEntriesWithoutFileSequenceNumber() throws IOException { - Assume.assumeTrue("Sequence numbers are valid for format version > 1", formatVersion > 1); + assumeThat(formatVersion).isGreaterThan(1); DataFile file1 = newFile(50); DataFile file2 = newFile(50); @@ -194,10 +172,7 @@ public void testCommitManifestWithExistingEntriesWithoutFileSequenceNumber() thr manifestEntry(Status.EXISTING, appendSnapshotId, appendSequenceNumber, null, file1), manifestEntry(Status.EXISTING, appendSnapshotId, appendSequenceNumber, null, file2)); - Assert.assertEquals( - "Manifest should have no sequence number before commit", - ManifestWriter.UNASSIGNED_SEQ, - newManifest.sequenceNumber()); + assertThat(newManifest.sequenceNumber()).isEqualTo(ManifestWriter.UNASSIGNED_SEQ); table.rewriteManifests().deleteManifest(originalManifest).addManifest(newManifest).commit(); @@ -205,15 +180,9 @@ public void testCommitManifestWithExistingEntriesWithoutFileSequenceNumber() thr ManifestFile committedManifest = table.currentSnapshot().dataManifests(table.io()).get(0); - Assert.assertEquals( - "Committed manifest sequence number must be correct", - rewriteSnapshot.sequenceNumber(), - committedManifest.sequenceNumber()); + assertThat(committedManifest.sequenceNumber()).isEqualTo(rewriteSnapshot.sequenceNumber()); - Assert.assertEquals( - "Committed manifest min sequence number must be correct", - appendSequenceNumber, - committedManifest.minSequenceNumber()); + assertThat(committedManifest.minSequenceNumber()).isEqualTo(appendSequenceNumber); validateManifest( committedManifest, @@ -224,30 +193,30 @@ public void testCommitManifestWithExistingEntriesWithoutFileSequenceNumber() thr statuses(Status.EXISTING, Status.EXISTING)); } - @Test + @TestTemplate public void testRollingManifestWriterNoRecords() throws IOException { RollingManifestWriter writer = newRollingWriteManifest(SMALL_FILE_SIZE); writer.close(); - Assertions.assertThat(writer.toManifestFiles()).isEmpty(); + assertThat(writer.toManifestFiles()).isEmpty(); writer.close(); - Assertions.assertThat(writer.toManifestFiles()).isEmpty(); + assertThat(writer.toManifestFiles()).isEmpty(); } - @Test + @TestTemplate public void testRollingDeleteManifestWriterNoRecords() throws IOException { - Assumptions.assumeThat(formatVersion).isGreaterThan(1); + assumeThat(formatVersion).isGreaterThan(1); RollingManifestWriter writer = newRollingWriteDeleteManifest(SMALL_FILE_SIZE); writer.close(); - Assertions.assertThat(writer.toManifestFiles()).isEmpty(); + assertThat(writer.toManifestFiles()).isEmpty(); writer.close(); - Assertions.assertThat(writer.toManifestFiles()).isEmpty(); + assertThat(writer.toManifestFiles()).isEmpty(); } - @Test + @TestTemplate public void testRollingManifestWriterSplitFiles() throws IOException { RollingManifestWriter writer = newRollingWriteManifest(SMALL_FILE_SIZE); @@ -278,7 +247,7 @@ public void testRollingManifestWriterSplitFiles() throws IOException { writer.close(); List manifestFiles = writer.toManifestFiles(); - Assertions.assertThat(manifestFiles.size()).isEqualTo(3); + assertThat(manifestFiles).hasSize(3); checkManifests( manifestFiles, @@ -291,7 +260,7 @@ public void testRollingManifestWriterSplitFiles() throws IOException { writer.close(); manifestFiles = writer.toManifestFiles(); - Assertions.assertThat(manifestFiles.size()).isEqualTo(3); + assertThat(manifestFiles).hasSize(3); checkManifests( manifestFiles, @@ -303,9 +272,9 @@ public void testRollingManifestWriterSplitFiles() throws IOException { deletedRowCounts); } - @Test + @TestTemplate public void testRollingDeleteManifestWriterSplitFiles() throws IOException { - Assumptions.assumeThat(formatVersion).isGreaterThan(1); + assumeThat(formatVersion).isGreaterThan(1); RollingManifestWriter writer = newRollingWriteDeleteManifest(SMALL_FILE_SIZE); int[] addedFileCounts = new int[3]; @@ -334,7 +303,7 @@ public void testRollingDeleteManifestWriterSplitFiles() throws IOException { writer.close(); List manifestFiles = writer.toManifestFiles(); - Assertions.assertThat(manifestFiles.size()).isEqualTo(3); + assertThat(manifestFiles).hasSize(3); checkManifests( manifestFiles, @@ -347,7 +316,7 @@ public void testRollingDeleteManifestWriterSplitFiles() throws IOException { writer.close(); manifestFiles = writer.toManifestFiles(); - Assertions.assertThat(manifestFiles.size()).isEqualTo(3); + assertThat(manifestFiles).hasSize(3); checkManifests( manifestFiles, @@ -370,17 +339,17 @@ private void checkManifests( for (int i = 0; i < manifests.size(); i++) { ManifestFile manifest = manifests.get(i); - Assertions.assertThat(manifest.hasAddedFiles()).isTrue(); - Assertions.assertThat(manifest.addedFilesCount()).isEqualTo(addedFileCounts[i]); - Assertions.assertThat(manifest.addedRowsCount()).isEqualTo(addedRowCounts[i]); + assertThat(manifest.hasAddedFiles()).isTrue(); + assertThat(manifest.addedFilesCount()).isEqualTo(addedFileCounts[i]); + assertThat(manifest.addedRowsCount()).isEqualTo(addedRowCounts[i]); - Assertions.assertThat(manifest.hasExistingFiles()).isTrue(); - Assertions.assertThat(manifest.existingFilesCount()).isEqualTo(existingFileCounts[i]); - Assertions.assertThat(manifest.existingRowsCount()).isEqualTo(existingRowCounts[i]); + assertThat(manifest.hasExistingFiles()).isTrue(); + assertThat(manifest.existingFilesCount()).isEqualTo(existingFileCounts[i]); + assertThat(manifest.existingRowsCount()).isEqualTo(existingRowCounts[i]); - Assertions.assertThat(manifest.hasDeletedFiles()).isTrue(); - Assertions.assertThat(manifest.deletedFilesCount()).isEqualTo(deletedFileCounts[i]); - Assertions.assertThat(manifest.deletedRowsCount()).isEqualTo(deletedRowCounts[i]); + assertThat(manifest.hasDeletedFiles()).isTrue(); + assertThat(manifest.deletedFilesCount()).isEqualTo(deletedFileCounts[i]); + assertThat(manifest.deletedRowsCount()).isEqualTo(deletedRowCounts[i]); } } @@ -430,7 +399,9 @@ private RollingManifestWriter newRollingWriteDeleteManifest(long tar private OutputFile newManifestFile() { try { - return Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString())); + return Files.localOutput( + FileFormat.AVRO.addExtension( + File.createTempFile("manifest", null, temp.toFile()).toString())); } catch (IOException e) { throw new UncheckedIOException(e); } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java index 740791b255d5..1d5c34fa4b16 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java @@ -19,8 +19,12 @@ package org.apache.iceberg; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.List; import org.apache.iceberg.inmemory.InMemoryOutputFile; import org.apache.iceberg.io.CloseableIterable; @@ -33,11 +37,8 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestManifestWriterVersions { private static final FileIO FILE_IO = new TestTables.LocalFileIO(); @@ -97,7 +98,7 @@ public class TestManifestWriterVersions { null, null); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; @Test public void testV1Write() throws IOException { @@ -112,7 +113,7 @@ public void testV1Write() throws IOException { @Test public void testV1WriteDelete() { - Assertions.assertThatThrownBy(() -> writeDeleteManifest(1)) + assertThatThrownBy(() -> writeDeleteManifest(1)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot write delete files in a v1 table"); } @@ -130,7 +131,7 @@ public void testV1WriteWithInheritance() throws IOException { public void testV2Write() throws IOException { ManifestFile manifest = writeManifest(2); checkManifest(manifest, ManifestWriter.UNASSIGNED_SEQ); - Assert.assertEquals("Content", ManifestContent.DATA, manifest.content()); + assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); checkEntry( readManifest(manifest), ManifestWriter.UNASSIGNED_SEQ, @@ -142,7 +143,7 @@ public void testV2Write() throws IOException { public void testV2WriteWithInheritance() throws IOException { ManifestFile manifest = writeAndReadManifestList(writeManifest(2), 2); checkManifest(manifest, SEQUENCE_NUMBER); - Assert.assertEquals("Content", ManifestContent.DATA, manifest.content()); + assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); // v2 should use the correct sequence number by inheriting it checkEntry(readManifest(manifest), SEQUENCE_NUMBER, SEQUENCE_NUMBER, FileContent.DATA); @@ -152,7 +153,7 @@ public void testV2WriteWithInheritance() throws IOException { public void testV2WriteDelete() throws IOException { ManifestFile manifest = writeDeleteManifest(2); checkManifest(manifest, ManifestWriter.UNASSIGNED_SEQ); - Assert.assertEquals("Content", ManifestContent.DELETES, manifest.content()); + assertThat(manifest.content()).isEqualTo(ManifestContent.DELETES); checkEntry( readDeleteManifest(manifest), ManifestWriter.UNASSIGNED_SEQ, @@ -164,7 +165,7 @@ public void testV2WriteDelete() throws IOException { public void testV2WriteDeleteWithInheritance() throws IOException { ManifestFile manifest = writeAndReadManifestList(writeDeleteManifest(2), 2); checkManifest(manifest, SEQUENCE_NUMBER); - Assert.assertEquals("Content", ManifestContent.DELETES, manifest.content()); + assertThat(manifest.content()).isEqualTo(ManifestContent.DELETES); // v2 should use the correct sequence number by inheriting it checkEntry( @@ -214,69 +215,65 @@ void checkEntry( Long expectedDataSequenceNumber, Long expectedFileSequenceNumber, FileContent content) { - Assert.assertEquals("Status", ManifestEntry.Status.ADDED, entry.status()); - Assert.assertEquals("Snapshot ID", (Long) SNAPSHOT_ID, entry.snapshotId()); - Assert.assertEquals( - "Data sequence number", expectedDataSequenceNumber, entry.dataSequenceNumber()); - Assert.assertEquals( - "File sequence number", expectedFileSequenceNumber, entry.fileSequenceNumber()); + assertThat(entry.status()).isEqualTo(ManifestEntry.Status.ADDED); + assertThat(entry.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(entry.dataSequenceNumber()).isEqualTo(expectedDataSequenceNumber); + assertThat(entry.fileSequenceNumber()).isEqualTo(expectedFileSequenceNumber); checkDataFile(entry.file(), content); } void checkRewrittenEntry( ManifestEntry entry, Long expectedSequenceNumber, FileContent content) { - Assert.assertEquals("Status", ManifestEntry.Status.EXISTING, entry.status()); - Assert.assertEquals("Snapshot ID", (Long) SNAPSHOT_ID, entry.snapshotId()); - Assert.assertEquals("Data sequence number", expectedSequenceNumber, entry.dataSequenceNumber()); + assertThat(entry.status()).isEqualTo(ManifestEntry.Status.EXISTING); + assertThat(entry.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(entry.dataSequenceNumber()).isEqualTo(expectedSequenceNumber); checkDataFile(entry.file(), content); } void checkDataFile(ContentFile dataFile, FileContent content) { // DataFile is the superclass of DeleteFile, so this method can check both - Assert.assertEquals("Content", content, dataFile.content()); - Assert.assertEquals("Path", PATH, dataFile.path()); - Assert.assertEquals("Format", FORMAT, dataFile.format()); - Assert.assertEquals("Partition", PARTITION, dataFile.partition()); - Assert.assertEquals("Record count", METRICS.recordCount(), (Long) dataFile.recordCount()); - Assert.assertEquals("Column sizes", METRICS.columnSizes(), dataFile.columnSizes()); - Assert.assertEquals("Value counts", METRICS.valueCounts(), dataFile.valueCounts()); - Assert.assertEquals("Null value counts", METRICS.nullValueCounts(), dataFile.nullValueCounts()); - Assert.assertEquals("NaN value counts", METRICS.nanValueCounts(), dataFile.nanValueCounts()); - Assert.assertEquals("Lower bounds", METRICS.lowerBounds(), dataFile.lowerBounds()); - Assert.assertEquals("Upper bounds", METRICS.upperBounds(), dataFile.upperBounds()); - Assert.assertEquals("Sort order id", SORT_ORDER_ID, dataFile.sortOrderId()); + assertThat(dataFile.content()).isEqualTo(content); + assertThat(dataFile.path()).isEqualTo(PATH); + assertThat(dataFile.format()).isEqualTo(FORMAT); + assertThat(dataFile.partition()).isEqualTo(PARTITION); + assertThat(dataFile.recordCount()).isEqualTo(METRICS.recordCount()); + assertThat(dataFile.columnSizes()).isEqualTo(METRICS.columnSizes()); + assertThat(dataFile.valueCounts()).isEqualTo(METRICS.valueCounts()); + assertThat(dataFile.nullValueCounts()).isEqualTo(METRICS.nullValueCounts()); + assertThat(dataFile.nanValueCounts()).isEqualTo(METRICS.nanValueCounts()); + assertThat(dataFile.lowerBounds()).isEqualTo(METRICS.lowerBounds()); + assertThat(dataFile.upperBounds()).isEqualTo(METRICS.upperBounds()); + assertThat(dataFile.sortOrderId()).isEqualTo(SORT_ORDER_ID); if (dataFile.content() == FileContent.EQUALITY_DELETES) { - Assert.assertEquals(EQUALITY_IDS, dataFile.equalityFieldIds()); + assertThat(dataFile.equalityFieldIds()).isEqualTo(EQUALITY_IDS); } else { - Assert.assertNull(dataFile.equalityFieldIds()); + assertThat(dataFile.equalityFieldIds()).isNull(); } } void checkManifest(ManifestFile manifest, long expectedSequenceNumber) { - Assert.assertEquals("Snapshot ID", (Long) SNAPSHOT_ID, manifest.snapshotId()); - Assert.assertEquals("Sequence number", expectedSequenceNumber, manifest.sequenceNumber()); - Assert.assertEquals( - "Min sequence number", expectedSequenceNumber, manifest.minSequenceNumber()); - Assert.assertEquals("Added files count", (Integer) 1, manifest.addedFilesCount()); - Assert.assertEquals("Existing files count", (Integer) 0, manifest.existingFilesCount()); - Assert.assertEquals("Deleted files count", (Integer) 0, manifest.deletedFilesCount()); - Assert.assertEquals("Added rows count", METRICS.recordCount(), manifest.addedRowsCount()); - Assert.assertEquals("Existing rows count", (Long) 0L, manifest.existingRowsCount()); - Assert.assertEquals("Deleted rows count", (Long) 0L, manifest.deletedRowsCount()); + assertThat(manifest.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(manifest.sequenceNumber()).isEqualTo(expectedSequenceNumber); + assertThat(manifest.minSequenceNumber()).isEqualTo(expectedSequenceNumber); + assertThat(manifest.addedFilesCount()).isEqualTo(1); + assertThat(manifest.existingFilesCount()).isEqualTo(0); + assertThat(manifest.deletedFilesCount()).isEqualTo(0); + assertThat(manifest.addedRowsCount()).isEqualTo(METRICS.recordCount()); + assertThat(manifest.existingRowsCount()).isEqualTo(0); + assertThat(manifest.deletedRowsCount()).isEqualTo(0); } void checkRewrittenManifest( ManifestFile manifest, long expectedSequenceNumber, long expectedMinSequenceNumber) { - Assert.assertEquals("Snapshot ID", (Long) SNAPSHOT_ID, manifest.snapshotId()); - Assert.assertEquals("Sequence number", expectedSequenceNumber, manifest.sequenceNumber()); - Assert.assertEquals( - "Min sequence number", expectedMinSequenceNumber, manifest.minSequenceNumber()); - Assert.assertEquals("Added files count", (Integer) 0, manifest.addedFilesCount()); - Assert.assertEquals("Existing files count", (Integer) 1, manifest.existingFilesCount()); - Assert.assertEquals("Deleted files count", (Integer) 0, manifest.deletedFilesCount()); - Assert.assertEquals("Added rows count", (Long) 0L, manifest.addedRowsCount()); - Assert.assertEquals("Existing rows count", METRICS.recordCount(), manifest.existingRowsCount()); - Assert.assertEquals("Deleted rows count", (Long) 0L, manifest.deletedRowsCount()); + assertThat(manifest.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat(manifest.sequenceNumber()).isEqualTo(expectedSequenceNumber); + assertThat(manifest.minSequenceNumber()).isEqualTo(expectedMinSequenceNumber); + assertThat(manifest.addedFilesCount()).isEqualTo(0); + assertThat(manifest.existingFilesCount()).isEqualTo(1); + assertThat(manifest.deletedFilesCount()).isEqualTo(0); + assertThat(manifest.addedRowsCount()).isEqualTo(0); + assertThat(manifest.existingRowsCount()).isEqualTo(METRICS.recordCount()); + assertThat(manifest.deletedRowsCount()).isEqualTo(0); } private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException { @@ -296,14 +293,16 @@ private InputFile writeManifestList(ManifestFile manifest, int formatVersion) th private ManifestFile writeAndReadManifestList(ManifestFile manifest, int formatVersion) throws IOException { List manifests = ManifestLists.read(writeManifestList(manifest, formatVersion)); - Assert.assertEquals("Should contain one manifest", 1, manifests.size()); + assertThat(manifests).hasSize(1); return manifests.get(0); } private ManifestFile rewriteManifest(ManifestFile manifest, int formatVersion) throws IOException { OutputFile manifestFile = - Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString())); + Files.localOutput( + FileFormat.AVRO.addExtension( + File.createTempFile("manifest", null, temp.toFile()).toString())); ManifestWriter writer = ManifestFiles.write(formatVersion, SPEC, manifestFile, SNAPSHOT_ID); try { @@ -320,7 +319,9 @@ private ManifestFile writeManifest(int formatVersion) throws IOException { private ManifestFile writeManifest(DataFile file, int formatVersion) throws IOException { OutputFile manifestFile = - Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString())); + Files.localOutput( + FileFormat.AVRO.addExtension( + File.createTempFile("manifest", null, temp.toFile()).toString())); ManifestWriter writer = ManifestFiles.write(formatVersion, SPEC, manifestFile, SNAPSHOT_ID); try { @@ -335,14 +336,16 @@ private ManifestEntry readManifest(ManifestFile manifest) throws IOExc try (CloseableIterable> reader = ManifestFiles.read(manifest, FILE_IO).entries()) { List> files = Lists.newArrayList(reader); - Assert.assertEquals("Should contain only one data file", 1, files.size()); + assertThat(files).hasSize(1); return files.get(0); } } private ManifestFile writeDeleteManifest(int formatVersion) throws IOException { OutputFile manifestFile = - Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString())); + Files.localOutput( + FileFormat.AVRO.addExtension( + File.createTempFile("manifest", null, temp.toFile()).toString())); ManifestWriter writer = ManifestFiles.writeDeleteManifest(formatVersion, SPEC, manifestFile, SNAPSHOT_ID); try { @@ -357,7 +360,7 @@ private ManifestEntry readDeleteManifest(ManifestFile manifest) thro try (CloseableIterable> reader = ManifestFiles.readDeleteManifest(manifest, FILE_IO, null).entries()) { List> entries = Lists.newArrayList(reader); - Assert.assertEquals("Should contain only one data file", 1, entries.size()); + assertThat(entries).hasSize(1); return entries.get(0); } } From 0cdf62f8fd90f0480d46b2b74787e1db87042ea6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:02:52 +0100 Subject: [PATCH 09/23] Build: Bump spring-boot from 2.5.4 to 2.7.18 (#9985) Bumps `spring-boot` from 2.5.4 to 2.7.18. Updates `org.springframework.boot:spring-boot-starter-jetty` from 2.5.4 to 2.7.18 - [Release notes](https://github.com/spring-projects/spring-boot/releases) - [Commits](https://github.com/spring-projects/spring-boot/compare/v2.5.4...v2.7.18) Updates `org.springframework.boot:spring-boot-starter-web` from 2.5.4 to 2.7.18 - [Release notes](https://github.com/spring-projects/spring-boot/releases) - [Commits](https://github.com/spring-projects/spring-boot/compare/v2.5.4...v2.7.18) --- updated-dependencies: - dependency-name: org.springframework.boot:spring-boot-starter-jetty dependency-type: direct:production update-type: version-update:semver-minor - dependency-name: org.springframework.boot:spring-boot-starter-web dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 26d0a79e57b0..ec7e91afe2ea 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -80,7 +80,7 @@ snowflake-jdbc = "3.14.5" spark-hive33 = "3.3.4" spark-hive34 = "3.4.2" spark-hive35 = "3.5.0" -spring-boot = "2.5.4" +spring-boot = "2.7.18" spring-web = "5.3.30" sqlite-jdbc = "3.45.1.0" testcontainers = "1.19.5" From e687954579a487217cbc069eea3ffd0a8a75c580 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:53:19 +0100 Subject: [PATCH 10/23] Build: Bump org.springframework:spring-web from 5.3.30 to 5.3.33 (#9989) Bumps [org.springframework:spring-web](https://github.com/spring-projects/spring-framework) from 5.3.30 to 5.3.33. - [Release notes](https://github.com/spring-projects/spring-framework/releases) - [Commits](https://github.com/spring-projects/spring-framework/compare/v5.3.30...v5.3.33) --- updated-dependencies: - dependency-name: org.springframework:spring-web dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index ec7e91afe2ea..17693a69d27b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -81,7 +81,7 @@ spark-hive33 = "3.3.4" spark-hive34 = "3.4.2" spark-hive35 = "3.5.0" spring-boot = "2.7.18" -spring-web = "5.3.30" +spring-web = "5.3.33" sqlite-jdbc = "3.45.1.0" testcontainers = "1.19.5" tez010 = "0.10.3" From a3f8879815138a6e815115578386a1c5f106d807 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:53:56 +0100 Subject: [PATCH 11/23] Build: Bump jetty from 9.4.53.v20231009 to 9.4.54.v20240208 (#9982) Bumps `jetty` from 9.4.53.v20231009 to 9.4.54.v20240208. Updates `org.eclipse.jetty:jetty-server` from 9.4.53.v20231009 to 9.4.54.v20240208 Updates `org.eclipse.jetty:jetty-servlet` from 9.4.53.v20231009 to 9.4.54.v20240208 --- updated-dependencies: - dependency-name: org.eclipse.jetty:jetty-server dependency-type: direct:production update-type: version-update:semver-patch - dependency-name: org.eclipse.jetty:jetty-servlet dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 17693a69d27b..d1a26280ca37 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -58,7 +58,7 @@ jackson215 = { strictly = "2.15.2"} jakarta-el-api = "3.0.3" jaxb-api = "2.3.1" jaxb-runtime = "2.3.3" -jetty = "9.4.53.v20231009" +jetty = "9.4.54.v20240208" junit = "5.10.1" kafka = "3.6.1" kryo-shaded = "4.0.3" From 7a6143abbb458ae17fea4cad12c2aa86d74814b0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Mar 2024 14:13:57 +0100 Subject: [PATCH 12/23] Build: Bump guava from 33.0.0-jre to 33.1.0-jre (#9977) Bumps `guava` from 33.0.0-jre to 33.1.0-jre. Updates `com.google.guava:guava` from 33.0.0-jre to 33.1.0-jre - [Release notes](https://github.com/google/guava/releases) - [Commits](https://github.com/google/guava/commits) Updates `com.google.guava:guava-testlib` from 33.0.0-jre to 33.1.0-jre - [Release notes](https://github.com/google/guava/releases) - [Commits](https://github.com/google/guava/commits) --- updated-dependencies: - dependency-name: com.google.guava:guava dependency-type: direct:production update-type: version-update:semver-minor - dependency-name: com.google.guava:guava-testlib dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index d1a26280ca37..34e9fc427ca9 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -42,7 +42,7 @@ flink116 = { strictly = "1.16.3"} flink117 = { strictly = "1.17.2"} flink118 = { strictly = "1.18.1"} google-libraries-bom = "26.28.0" -guava = "33.0.0-jre" +guava = "33.1.0-jre" hadoop2 = "2.7.3" hadoop3-client = "3.3.6" httpcomponents-httpclient5 = "5.3.1" From f614a3f8b0379188efcc0c38caa486c94c6a52a3 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 18 Mar 2024 16:42:14 +0100 Subject: [PATCH 13/23] API: Fix `TestStrictMetricsEvaluator` assertion message (#9992) All values (`5` and `6`) are not between the upper and lower bound of `[30, 79]`. --- .../apache/iceberg/expressions/TestStrictMetricsEvaluator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java index 2266389abab0..82aaff02149a 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java @@ -592,7 +592,7 @@ public void testIntegerNotIn() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, notIn("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24)) .eval(FILE); - assertThat(shouldRead).as("Should not match: all values !=5 and !=6").isTrue(); + assertThat(shouldRead).as("Should match: all values !=5 and !=6").isTrue(); shouldRead = new StrictMetricsEvaluator(SCHEMA, notIn("id", INT_MIN_VALUE - 1, INT_MIN_VALUE)) From 353e55e24fa751ec877c597b9647bcd75bccbf51 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Mar 2024 08:58:10 +0100 Subject: [PATCH 14/23] Build: Bump arrow from 15.0.0 to 15.0.1 (#9910) Bumps `arrow` from 15.0.0 to 15.0.1. Updates `org.apache.arrow:arrow-memory-netty` from 15.0.0 to 15.0.1 Updates `org.apache.arrow:arrow-vector` from 15.0.0 to 15.0.1 - [Commits](https://github.com/apache/arrow/compare/go/v15.0.0...go/v15.0.1) --- updated-dependencies: - dependency-name: org.apache.arrow:arrow-memory-netty dependency-type: direct:production update-type: version-update:semver-patch - dependency-name: org.apache.arrow:arrow-vector dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 34e9fc427ca9..ce571331529f 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -24,7 +24,7 @@ activation = "1.1.1" aliyun-sdk-oss = "3.10.2" antlr = "4.9.3" aircompressor = "0.26" -arrow = "15.0.0" +arrow = "15.0.1" avro = "1.11.3" assertj-core = "3.25.3" awaitility = "4.2.1" From f425dc7401b268f92778c3c9b2c6abfc7fb661df Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Tue, 19 Mar 2024 17:05:41 +0100 Subject: [PATCH 15/23] AWS, Core: Replace .withFailMessage() usage with .as() (#10000) We almost never want to use `.withFailMessage()` as that will override any enriched contextual information that AssertJ generally provides about actual/expected. It's better to use `.as()` to add some description to the check being done, which allows to still show contextual information about actual/expected if the assertion ever fails. --- .../iceberg/aws/AwsClientPropertiesTest.java | 33 +++++++++---------- .../iceberg/aws/HttpClientPropertiesTest.java | 12 +++---- .../aws/TestS3FileIOAwsClientFactories.java | 4 +-- .../aws/s3/TestS3FileIOProperties.java | 6 ++-- .../apache/iceberg/catalog/CatalogTests.java | 22 ++++++------- 5 files changed, 36 insertions(+), 41 deletions(-) diff --git a/aws/src/test/java/org/apache/iceberg/aws/AwsClientPropertiesTest.java b/aws/src/test/java/org/apache/iceberg/aws/AwsClientPropertiesTest.java index 7f57fbcb2d10..fd3bea68088b 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/AwsClientPropertiesTest.java +++ b/aws/src/test/java/org/apache/iceberg/aws/AwsClientPropertiesTest.java @@ -46,7 +46,7 @@ public void testApplyClientRegion() { Mockito.verify(mockS3ClientBuilder).region(regionArgumentCaptor.capture()); Region region = regionArgumentCaptor.getValue(); Assertions.assertThat(region.id()) - .withFailMessage("region parameter should match what is set in CLIENT_REGION") + .as("region parameter should match what is set in CLIENT_REGION") .isEqualTo("us-east-1"); } @@ -56,9 +56,9 @@ public void testDefaultCredentialsConfiguration() { AwsCredentialsProvider credentialsProvider = awsClientProperties.credentialsProvider(null, null, null); - Assertions.assertThat(credentialsProvider instanceof DefaultCredentialsProvider) - .withFailMessage("Should use default credentials if nothing is set") - .isTrue(); + Assertions.assertThat(credentialsProvider) + .as("Should use default credentials if nothing is set") + .isInstanceOf(DefaultCredentialsProvider.class); } @Test @@ -70,7 +70,7 @@ public void testCreatesNewInstanceOfDefaultCredentialsConfiguration() { awsClientProperties.credentialsProvider(null, null, null); Assertions.assertThat(credentialsProvider) - .withFailMessage("Should create a new instance in each call") + .as("Should create a new instance in each call") .isNotSameAs(credentialsProvider2); } @@ -81,17 +81,15 @@ public void testBasicCredentialsConfiguration() { AwsCredentialsProvider credentialsProvider = awsClientProperties.credentialsProvider("key", "secret", null); - Assertions.assertThat(credentialsProvider.resolveCredentials() instanceof AwsBasicCredentials) - .withFailMessage( - "Should use basic credentials if access key ID and secret access key are set") - .isTrue(); + Assertions.assertThat(credentialsProvider.resolveCredentials()) + .as("Should use basic credentials if access key ID and secret access key are set") + .isInstanceOf(AwsBasicCredentials.class); Assertions.assertThat(credentialsProvider.resolveCredentials().accessKeyId()) - .withFailMessage("The access key id should be the same as the one set by tag ACCESS_KEY_ID") + .as("The access key id should be the same as the one set by tag ACCESS_KEY_ID") .isEqualTo("key"); Assertions.assertThat(credentialsProvider.resolveCredentials().secretAccessKey()) - .withFailMessage( - "The secret access key should be the same as the one set by tag SECRET_ACCESS_KEY") + .as("The secret access key should be the same as the one set by tag SECRET_ACCESS_KEY") .isEqualTo("secret"); } @@ -102,15 +100,14 @@ public void testSessionCredentialsConfiguration() { AwsCredentialsProvider credentialsProvider = awsClientProperties.credentialsProvider("key", "secret", "token"); - Assertions.assertThat(credentialsProvider.resolveCredentials() instanceof AwsSessionCredentials) - .withFailMessage("Should use session credentials if session token is set") - .isTrue(); + Assertions.assertThat(credentialsProvider.resolveCredentials()) + .as("Should use session credentials if session token is set") + .isInstanceOf(AwsSessionCredentials.class); Assertions.assertThat(credentialsProvider.resolveCredentials().accessKeyId()) - .withFailMessage("The access key id should be the same as the one set by tag ACCESS_KEY_ID") + .as("The access key id should be the same as the one set by tag ACCESS_KEY_ID") .isEqualTo("key"); Assertions.assertThat(credentialsProvider.resolveCredentials().secretAccessKey()) - .withFailMessage( - "The secret access key should be the same as the one set by tag SECRET_ACCESS_KEY") + .as("The secret access key should be the same as the one set by tag SECRET_ACCESS_KEY") .isEqualTo("secret"); } } diff --git a/aws/src/test/java/org/apache/iceberg/aws/HttpClientPropertiesTest.java b/aws/src/test/java/org/apache/iceberg/aws/HttpClientPropertiesTest.java index 270c4c1dc090..7c6e5c19256d 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/HttpClientPropertiesTest.java +++ b/aws/src/test/java/org/apache/iceberg/aws/HttpClientPropertiesTest.java @@ -45,9 +45,9 @@ public void testUrlHttpClientConfiguration() { Mockito.verify(mockS3ClientBuilder).httpClientBuilder(httpClientBuilderCaptor.capture()); SdkHttpClient.Builder capturedHttpClientBuilder = httpClientBuilderCaptor.getValue(); - Assertions.assertThat(capturedHttpClientBuilder instanceof UrlConnectionHttpClient.Builder) - .withFailMessage("Should use url connection http client") - .isTrue(); + Assertions.assertThat(capturedHttpClientBuilder) + .as("Should use url connection http client") + .isInstanceOf(UrlConnectionHttpClient.Builder.class); } @Test @@ -62,9 +62,9 @@ public void testApacheHttpClientConfiguration() { httpClientProperties.applyHttpClientConfigurations(mockS3ClientBuilder); Mockito.verify(mockS3ClientBuilder).httpClientBuilder(httpClientBuilderCaptor.capture()); SdkHttpClient.Builder capturedHttpClientBuilder = httpClientBuilderCaptor.getValue(); - Assertions.assertThat(capturedHttpClientBuilder instanceof ApacheHttpClient.Builder) - .withFailMessage("Should use apache http client") - .isTrue(); + Assertions.assertThat(capturedHttpClientBuilder) + .as("Should use apache http client") + .isInstanceOf(ApacheHttpClient.Builder.class); } @Test diff --git a/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOAwsClientFactories.java b/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOAwsClientFactories.java index f0bf9f7067e3..f09d4e543f17 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOAwsClientFactories.java +++ b/aws/src/test/java/org/apache/iceberg/aws/TestS3FileIOAwsClientFactories.java @@ -35,7 +35,7 @@ public void testS3FileIOImplCatalogPropertyDefined() { "org.apache.iceberg.aws.s3.DefaultS3FileIOAwsClientFactory"); Object factoryImpl = S3FileIOAwsClientFactories.initialize(properties); Assertions.assertThat(factoryImpl) - .withFailMessage( + .as( "should instantiate an object of type S3FileIOAwsClientFactory when s3.client-factory-impl is set") .isInstanceOf(S3FileIOAwsClientFactory.class); } @@ -46,7 +46,7 @@ public void testS3FileIOImplCatalogPropertyNotDefined() { Map properties = Maps.newHashMap(); Object factoryImpl = S3FileIOAwsClientFactories.initialize(properties); Assertions.assertThat(factoryImpl) - .withFailMessage( + .as( "should instantiate an object of type AwsClientFactory when s3.client-factory-impl is not set") .isInstanceOf(AwsClientFactory.class); } diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java index 8dda5e16338e..2ed8a9471d66 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java @@ -447,13 +447,13 @@ public void testApplyS3ServiceConfigurations() { S3Configuration s3Configuration = s3ConfigurationCaptor.getValue(); Assertions.assertThat(s3Configuration.pathStyleAccessEnabled()) - .withFailMessage("s3 path style access enabled parameter should be set to true") + .as("s3 path style access enabled parameter should be set to true") .isTrue(); Assertions.assertThat(s3Configuration.useArnRegionEnabled()) - .withFailMessage("s3 use arn region enabled parameter should be set to true") + .as("s3 use arn region enabled parameter should be set to true") .isTrue(); Assertions.assertThat(s3Configuration.accelerateModeEnabled()) - .withFailMessage("s3 acceleration mode enabled parameter should be set to true") + .as("s3 acceleration mode enabled parameter should be set to true") .isFalse(); } diff --git a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java index d6b9f6d12074..6904449cc4c3 100644 --- a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java +++ b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java @@ -380,17 +380,17 @@ public void testListNamespaces() { catalog.createNamespace(ns1); Assertions.assertThat(catalog.listNamespaces()) - .withFailMessage("Should include newdb_1") + .as("Should include newdb_1") .hasSameElementsAs(concat(starting, ns1)); catalog.createNamespace(ns2); Assertions.assertThat(catalog.listNamespaces()) - .withFailMessage("Should include newdb_1 and newdb_2") + .as("Should include newdb_1 and newdb_2") .hasSameElementsAs(concat(starting, ns1, ns2)); catalog.dropNamespace(ns1); Assertions.assertThat(catalog.listNamespaces()) - .withFailMessage("Should include newdb_2, not newdb_1") + .as("Should include newdb_2, not newdb_1") .hasSameElementsAs(concat(starting, ns2)); catalog.dropNamespace(ns2); @@ -415,36 +415,34 @@ public void testListNestedNamespaces() { catalog.createNamespace(parent); Assertions.assertThat(catalog.listNamespaces()) - .withFailMessage("Should include parent") + .as("Should include parent") .hasSameElementsAs(concat(starting, parent)); Assertions.assertThat(catalog.listNamespaces(parent)) - .withFailMessage("Should have no children in newly created parent namespace") + .as("Should have no children in newly created parent namespace") .isEmpty(); catalog.createNamespace(child1); Assertions.assertThat(catalog.listNamespaces(parent)) - .withFailMessage("Should include child1") + .as("Should include child1") .hasSameElementsAs(ImmutableList.of(child1)); catalog.createNamespace(child2); Assertions.assertThat(catalog.listNamespaces(parent)) - .withFailMessage("Should include child1 and child2") + .as("Should include child1 and child2") .hasSameElementsAs(ImmutableList.of(child1, child2)); Assertions.assertThat(catalog.listNamespaces()) - .withFailMessage("Should not change listing the root") + .as("Should not change listing the root") .hasSameElementsAs(concat(starting, parent)); catalog.dropNamespace(child1); Assertions.assertThat(catalog.listNamespaces(parent)) - .withFailMessage("Should include only child2") + .as("Should include only child2") .hasSameElementsAs(ImmutableList.of(child2)); catalog.dropNamespace(child2); - Assertions.assertThat(catalog.listNamespaces(parent)) - .withFailMessage("Should be empty") - .isEmpty(); + Assertions.assertThat(catalog.listNamespaces(parent)).as("Should be empty").isEmpty(); } @Test From fae0f8140b4f0b1dee20679f98e8be7e9b93a373 Mon Sep 17 00:00:00 2001 From: Tom Tanaka <43331405+tomtongue@users.noreply.github.com> Date: Wed, 20 Mar 2024 16:06:38 +0900 Subject: [PATCH 16/23] Core: Migrate tests to JUnit5 (#9999) --- .../apache/iceberg/TestCommitReporting.java | 19 +- .../apache/iceberg/TestCreateTransaction.java | 330 +++++-------- .../apache/iceberg/TestPartitionSpecInfo.java | 94 ++-- .../iceberg/TestPartitionSpecParser.java | 35 +- .../org/apache/iceberg/TestPartitioning.java | 94 ++-- .../iceberg/TestReplaceTransaction.java | 255 ++++------ .../iceberg/TestSetPartitionStatistics.java | 64 +-- .../org/apache/iceberg/TestSetStatistics.java | 78 ++- .../org/apache/iceberg/TestTransaction.java | 462 +++++++----------- 9 files changed, 594 insertions(+), 837 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestCommitReporting.java b/core/src/test/java/org/apache/iceberg/TestCommitReporting.java index 08c4ac33d6fd..48b9c8d72bd2 100644 --- a/core/src/test/java/org/apache/iceberg/TestCommitReporting.java +++ b/core/src/test/java/org/apache/iceberg/TestCommitReporting.java @@ -21,21 +21,26 @@ import static org.assertj.core.api.Assertions.assertThat; import java.io.IOException; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.ScanPlanningAndReportingTestBase.TestMetricsReporter; import org.apache.iceberg.metrics.CommitMetricsResult; import org.apache.iceberg.metrics.CommitReport; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; -import org.junit.Test; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; -public class TestCommitReporting extends TableTestBase { +@ExtendWith(ParameterizedTestExtension.class) +public class TestCommitReporting extends TestBase { private final TestMetricsReporter reporter = new TestMetricsReporter(); - public TestCommitReporting() { - super(2); + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(2); } - @Test + @TestTemplate public void addAndDeleteDataFiles() { String tableName = "add-and-delete-data-files"; Table table = @@ -80,7 +85,7 @@ public void addAndDeleteDataFiles() { assertThat(metrics.totalFilesSizeInBytes().value()).isEqualTo(0L); } - @Test + @TestTemplate public void addAndDeleteDeleteFiles() { String tableName = "add-and-delete-delete-files"; Table table = @@ -150,7 +155,7 @@ public void addAndDeleteDeleteFiles() { assertThat(metrics.totalFilesSizeInBytes().value()).isEqualTo(0L); } - @Test + @TestTemplate public void addAndDeleteManifests() throws IOException { String tableName = "add-and-delete-manifests"; Table table = diff --git a/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java b/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java index 4240184e913c..0c6b50b37792 100644 --- a/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java +++ b/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java @@ -20,70 +20,61 @@ import static org.apache.iceberg.PartitionSpec.unpartitioned; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestCreateTransaction extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestCreateTransaction extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public TestCreateTransaction(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testCreateTransaction() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "test_create", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_create")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_create")); + assertThat(TestTables.readMetadata("test_create")).isNull(); + assertThat(TestTables.metadataVersion("test_create")).isNull(); txn.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_create"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_create")); - Assert.assertEquals("Should have 0 manifest files", 0, listManifestFiles(tableDir).size()); - - Assert.assertEquals( - "Table schema should match with reassigned IDs", - TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), - meta.schema().asStruct()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size()); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_create")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).isEmpty(); + + assertThat(meta.schema().asStruct()) + .isEqualTo(TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).isEmpty(); } - @Test + @TestTemplate public void testCreateTransactionAndUpdateSchema() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "test_create", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_create")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_create")); + assertThat(TestTables.readMetadata("test_create")).isNull(); + assertThat(TestTables.metadataVersion("test_create")).isNull(); txn.updateSchema() .allowIncompatibleChanges() @@ -94,10 +85,9 @@ public void testCreateTransactionAndUpdateSchema() throws IOException { txn.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_create"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_create")); - Assert.assertEquals("Should have 0 manifest files", 0, listManifestFiles(tableDir).size()); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_create")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).isEmpty(); Schema resultSchema = new Schema( @@ -107,263 +97,195 @@ public void testCreateTransactionAndUpdateSchema() throws IOException { required(3, "col", Types.StringType.get())), Sets.newHashSet(1, 3)); - Assert.assertEquals( - "Table schema should match with reassigned IDs", - resultSchema.asStruct(), - meta.schema().asStruct()); - Assert.assertEquals( - "Table schema identifier should match", - resultSchema.identifierFieldIds(), - meta.schema().identifierFieldIds()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size()); + assertThat(meta.schema().asStruct()).isEqualTo(resultSchema.asStruct()); + assertThat(meta.schema().identifierFieldIds()).isEqualTo(resultSchema.identifierFieldIds()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).isEmpty(); } - @Test + @TestTemplate public void testCreateAndAppendWithTransaction() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "test_append", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")).isNull(); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertNull( - "Appending in a transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")).isNull(); + assertThat(TestTables.metadataVersion("test_append")).isNull(); txn.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_append"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); - Assert.assertEquals("Should have 1 manifest file", 1, listManifestFiles(tableDir).size()); - - Assert.assertEquals( - "Table schema should match with reassigned IDs", - TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), - meta.schema().asStruct()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size()); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_append")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).hasSize(1); + + assertThat(meta.schema().asStruct()) + .isEqualTo(TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).hasSize(1); validateSnapshot(null, meta.currentSnapshot(), FILE_A, FILE_B); } - @Test + @TestTemplate public void testCreateAndAppendWithTable() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "test_append", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")) + .isEqualTo(TestTables.readMetadata("test_append")); + assertThat(TestTables.metadataVersion("test_append")).isNull(); - Assert.assertTrue( - "Should return a transaction table", - txn.table() instanceof BaseTransaction.TransactionTable); + assertThat(txn.table()).isInstanceOf(BaseTransaction.TransactionTable.class); txn.table().newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertNull( - "Appending in a transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")).isNull(); + assertThat(TestTables.metadataVersion("test_append")).isNull(); txn.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_append"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); - Assert.assertEquals("Should have 1 manifest file", 1, listManifestFiles(tableDir).size()); - - Assert.assertEquals( - "Table schema should match with reassigned IDs", - TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), - meta.schema().asStruct()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size()); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_append")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).hasSize(1); + + assertThat(meta.schema().asStruct()) + .isEqualTo(TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).hasSize(1); validateSnapshot(null, meta.currentSnapshot(), FILE_A, FILE_B); } - @Test + @TestTemplate public void testCreateAndUpdatePropertiesWithTransaction() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "test_properties", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_properties")); - Assert.assertNull( - "Should have no metadata version", TestTables.metadataVersion("test_properties")); + assertThat(TestTables.readMetadata("test_properties")).isNull(); + assertThat(TestTables.metadataVersion("test_properties")).isNull(); txn.updateProperties().set("test-property", "test-value").commit(); - Assert.assertNull( - "Adding properties in a transaction should not commit metadata", - TestTables.readMetadata("test_properties")); - Assert.assertNull( - "Should have no metadata version", TestTables.metadataVersion("test_properties")); + assertThat(TestTables.readMetadata("test_properties")).isNull(); + assertThat(TestTables.metadataVersion("test_properties")).isNull(); txn.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_properties"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_properties")); - Assert.assertEquals("Should have 0 manifest files", 0, listManifestFiles(tableDir).size()); - - Assert.assertEquals( - "Table schema should match with reassigned IDs", - TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), - meta.schema().asStruct()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size()); - Assert.assertEquals("Should have one table property", 1, meta.properties().size()); - Assert.assertEquals( - "Should have correct table property value", - "test-value", - meta.properties().get("test-property")); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_properties")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).isEmpty(); + + assertThat(meta.schema().asStruct()) + .isEqualTo(TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).isEmpty(); + assertThat(meta.properties()).hasSize(1).containsEntry("test-property", "test-value"); } - @Test + @TestTemplate public void testCreateAndUpdatePropertiesWithTable() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "test_properties", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_properties")); - Assert.assertNull( - "Should have no metadata version", TestTables.metadataVersion("test_properties")); + assertThat(TestTables.readMetadata("test_properties")).isNull(); + assertThat(TestTables.metadataVersion("test_properties")).isNull(); - Assert.assertTrue( - "Should return a transaction table", - txn.table() instanceof BaseTransaction.TransactionTable); + assertThat(txn.table()).isInstanceOf(BaseTransaction.TransactionTable.class); txn.table().updateProperties().set("test-property", "test-value").commit(); - Assert.assertNull( - "Adding properties in a transaction should not commit metadata", - TestTables.readMetadata("test_properties")); - Assert.assertNull( - "Should have no metadata version", TestTables.metadataVersion("test_properties")); + assertThat(TestTables.readMetadata("test_properties")).isNull(); + assertThat(TestTables.metadataVersion("test_properties")).isNull(); txn.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_properties"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_properties")); - Assert.assertEquals("Should have 0 manifest files", 0, listManifestFiles(tableDir).size()); - - Assert.assertEquals( - "Table schema should match with reassigned IDs", - TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), - meta.schema().asStruct()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should not have any snapshots", 0, meta.snapshots().size()); - Assert.assertEquals("Should have one table property", 1, meta.properties().size()); - Assert.assertEquals( - "Should have correct table property value", - "test-value", - meta.properties().get("test-property")); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_properties")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).hasSize(0); + + assertThat(meta.schema().asStruct()) + .isEqualTo(TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).isEmpty(); + assertThat(meta.properties()).hasSize(1).containsEntry("test-property", "test-value"); } - @Test + @TestTemplate public void testCreateDetectsUncommittedChange() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "uncommitted_change", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("uncommitted_change")); - Assert.assertNull( - "Should have no metadata version", TestTables.metadataVersion("uncommitted_change")); + assertThat(TestTables.readMetadata("uncommitted_change")).isNull(); + assertThat(TestTables.metadataVersion("uncommitted_change")).isNull(); txn.updateProperties().set("test-property", "test-value"); // not committed - Assertions.assertThatThrownBy(txn::newDelete) + assertThatThrownBy(txn::newDelete) .isInstanceOf(IllegalStateException.class) .hasMessage("Cannot create new DeleteFiles: last operation has not committed"); } - @Test + @TestTemplate public void testCreateDetectsUncommittedChangeOnCommit() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "uncommitted_change", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("uncommitted_change")); - Assert.assertNull( - "Should have no metadata version", TestTables.metadataVersion("uncommitted_change")); + assertThat(TestTables.readMetadata("uncommitted_change")).isNull(); + assertThat(TestTables.metadataVersion("uncommitted_change")).isNull(); txn.updateProperties().set("test-property", "test-value"); // not committed - Assertions.assertThatThrownBy(txn::commitTransaction) + assertThatThrownBy(txn::commitTransaction) .isInstanceOf(IllegalStateException.class) .hasMessage("Cannot commit transaction: last operation has not committed"); } - @Test + @TestTemplate public void testCreateTransactionConflict() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Transaction txn = TestTables.beginCreate(tableDir, "test_conflict", SCHEMA, SPEC); // append in the transaction to ensure a manifest file is created txn.newAppend().appendFile(FILE_A).commit(); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_conflict")); - Assert.assertNull( - "Should have no metadata version", TestTables.metadataVersion("test_conflict")); + assertThat(TestTables.readMetadata("test_conflict")).isNull(); + assertThat(TestTables.metadataVersion("test_conflict")).isNull(); Table conflict = TestTables.create(tableDir, "test_conflict", SCHEMA, unpartitioned(), formatVersion); - Assert.assertEquals( - "Table schema should match with reassigned IDs", - TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct(), - conflict.schema().asStruct()); - Assert.assertEquals( - "Table spec should match conflict table, not transaction table", - unpartitioned(), - conflict.spec()); - Assert.assertFalse( - "Table should not have any snapshots", conflict.snapshots().iterator().hasNext()); - - Assertions.assertThatThrownBy(txn::commitTransaction) + assertThat(conflict.schema().asStruct()) + .isEqualTo(TypeUtil.assignIncreasingFreshIds(SCHEMA).asStruct()); + assertThat(conflict.spec()).isEqualTo(unpartitioned()); + assertThat(conflict.snapshots()).isEmpty(); + + assertThatThrownBy(txn::commitTransaction) .isInstanceOf(CommitFailedException.class) .hasMessageStartingWith("Commit failed: table was updated"); - Assert.assertEquals( - "Should clean up metadata", - Sets.newHashSet(), - Sets.newHashSet(listManifestFiles(tableDir))); + assertThat(listManifestFiles(tableDir)).isEmpty(); } } diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java index 46f38b97f2a4..a82ac6636d4a 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java @@ -19,46 +19,45 @@ package org.apache.iceberg; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.entry; import java.io.File; import java.io.IOException; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.types.Types; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; + +@ExtendWith(ParameterizedTestExtension.class) public class TestPartitionSpecInfo { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; + private final Schema schema = new Schema( required(1, "id", Types.IntegerType.get()), required(2, "data", Types.StringType.get())); private File tableDir = null; - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - private final int formatVersion; - - public TestPartitionSpecInfo(int formatVersion) { - this.formatVersion = formatVersion; - } + @Parameter private int formatVersion; - @Before + @BeforeEach public void setupTableDir() throws IOException { - this.tableDir = temp.newFolder(); + this.tableDir = Files.createTempDirectory(temp, "junit").toFile(); } - @After + @AfterEach public void cleanupTables() { TestTables.clearTables(); } @@ -68,7 +67,7 @@ public void testSpecIsUnpartitionedForVoidTranforms() { PartitionSpec spec = PartitionSpec.builderFor(schema).alwaysNull("id").alwaysNull("data").build(); - Assert.assertTrue(spec.isUnpartitioned()); + assertThat(spec.isUnpartitioned()).isTrue(); } @Test @@ -76,11 +75,12 @@ public void testSpecInfoUnpartitionedTable() { PartitionSpec spec = PartitionSpec.unpartitioned(); TestTables.TestTable table = TestTables.create(tableDir, "test", schema, spec, formatVersion); - Assert.assertTrue(spec.isUnpartitioned()); - Assert.assertEquals(spec, table.spec()); - Assert.assertEquals(spec.lastAssignedFieldId(), table.spec().lastAssignedFieldId()); - Assert.assertEquals(ImmutableMap.of(spec.specId(), spec), table.specs()); - Assert.assertNull(table.specs().get(Integer.MAX_VALUE)); + assertThat(spec.isUnpartitioned()).isTrue(); + assertThat(table.spec()).isEqualTo(spec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(spec.lastAssignedFieldId()); + assertThat(table.specs()) + .containsExactly(entry(spec.specId(), spec)) + .doesNotContainKey(Integer.MAX_VALUE); } @Test @@ -88,10 +88,11 @@ public void testSpecInfoPartitionedTable() { PartitionSpec spec = PartitionSpec.builderFor(schema).identity("data").build(); TestTables.TestTable table = TestTables.create(tableDir, "test", schema, spec, formatVersion); - Assert.assertEquals(spec, table.spec()); - Assert.assertEquals(spec.lastAssignedFieldId(), table.spec().lastAssignedFieldId()); - Assert.assertEquals(ImmutableMap.of(spec.specId(), spec), table.specs()); - Assert.assertNull(table.specs().get(Integer.MAX_VALUE)); + assertThat(table.spec()).isEqualTo(spec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(spec.lastAssignedFieldId()); + assertThat(table.specs()) + .containsExactly(entry(spec.specId(), spec)) + .doesNotContainKey(Integer.MAX_VALUE); } @Test @@ -99,7 +100,7 @@ public void testColumnDropWithPartitionSpecEvolution() { PartitionSpec spec = PartitionSpec.builderFor(schema).identity("id").build(); TestTables.TestTable table = TestTables.create(tableDir, "test", schema, spec, formatVersion); - Assert.assertEquals(spec, table.spec()); + assertThat(table.spec()).isEqualTo(spec); TableMetadata base = TestTables.readMetadata("test"); PartitionSpec newSpec = @@ -111,14 +112,11 @@ public void testColumnDropWithPartitionSpecEvolution() { final Schema expectedSchema = new Schema(required(2, "data", Types.StringType.get())); - Assert.assertEquals(newSpec, table.spec()); - Assert.assertEquals(newSpec, table.specs().get(newSpec.specId())); - Assert.assertEquals(spec, table.specs().get(spec.specId())); - Assert.assertEquals( - ImmutableMap.of(spec.specId(), spec, newSpec.specId(), newSpec), table.specs()); - Assert.assertNull(table.specs().get(Integer.MAX_VALUE)); - Assert.assertTrue( - "Schema must have only \"data\" column", table.schema().sameSchema(expectedSchema)); + assertThat(table.spec()).isEqualTo(newSpec); + assertThat(table.specs()) + .containsExactly(entry(spec.specId(), spec), entry(newSpec.specId(), newSpec)) + .doesNotContainKey(Integer.MAX_VALUE); + assertThat(table.schema().asStruct()).isEqualTo(expectedSchema.asStruct()); } @Test @@ -126,18 +124,16 @@ public void testSpecInfoPartitionSpecEvolutionForV1Table() { PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 4).build(); TestTables.TestTable table = TestTables.create(tableDir, "test", schema, spec, formatVersion); - Assert.assertEquals(spec, table.spec()); + assertThat(table.spec()).isEqualTo(spec); TableMetadata base = TestTables.readMetadata("test"); PartitionSpec newSpec = PartitionSpec.builderFor(table.schema()).bucket("data", 10).withSpecId(1).build(); table.ops().commit(base, base.updatePartitionSpec(newSpec)); - Assert.assertEquals(newSpec, table.spec()); - Assert.assertEquals(newSpec, table.specs().get(newSpec.specId())); - Assert.assertEquals(spec, table.specs().get(spec.specId())); - Assert.assertEquals( - ImmutableMap.of(spec.specId(), spec, newSpec.specId(), newSpec), table.specs()); - Assert.assertNull(table.specs().get(Integer.MAX_VALUE)); + assertThat(table.spec()).isEqualTo(newSpec); + assertThat(table.specs()) + .containsExactly(entry(spec.specId(), spec), entry(newSpec.specId(), newSpec)) + .doesNotContainKey(Integer.MAX_VALUE); } } diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java index 1d88e97f9925..ad8861f53685 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java @@ -18,12 +18,18 @@ */ package org.apache.iceberg; -import org.junit.Assert; -import org.junit.Test; +import static org.assertj.core.api.Assertions.assertThat; -public class TestPartitionSpecParser extends TableTestBase { - public TestPartitionSpecParser() { - super(1); +import java.util.Arrays; +import java.util.List; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestPartitionSpecParser extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1); } @Test @@ -38,7 +44,7 @@ public void testToJsonForV1Table() { + " \"field-id\" : 1000\n" + " } ]\n" + "}"; - Assert.assertEquals(expected, PartitionSpecParser.toJson(table.spec(), true)); + assertThat(PartitionSpecParser.toJson(table.spec(), true)).isEqualTo(expected); PartitionSpec spec = PartitionSpec.builderFor(table.schema()).bucket("id", 8).bucket("data", 16).build(); @@ -60,7 +66,7 @@ public void testToJsonForV1Table() { + " \"field-id\" : 1001\n" + " } ]\n" + "}"; - Assert.assertEquals(expected, PartitionSpecParser.toJson(table.spec(), true)); + assertThat(PartitionSpecParser.toJson(table.spec(), true)).isEqualTo(expected); } @Test @@ -83,10 +89,10 @@ public void testFromJsonWithFieldId() { PartitionSpec spec = PartitionSpecParser.fromJson(table.schema(), specString); - Assert.assertEquals(2, spec.fields().size()); + assertThat(spec.fields()).hasSize(2); // should be the field ids in the JSON - Assert.assertEquals(1001, spec.fields().get(0).fieldId()); - Assert.assertEquals(1000, spec.fields().get(1).fieldId()); + assertThat(spec.fields().get(0).fieldId()).isEqualTo(1001); + assertThat(spec.fields().get(1).fieldId()).isEqualTo(1000); } @Test @@ -107,17 +113,16 @@ public void testFromJsonWithoutFieldId() { PartitionSpec spec = PartitionSpecParser.fromJson(table.schema(), specString); - Assert.assertEquals(2, spec.fields().size()); + assertThat(spec.fields()).hasSize(2); // should be the default assignment - Assert.assertEquals(1000, spec.fields().get(0).fieldId()); - Assert.assertEquals(1001, spec.fields().get(1).fieldId()); + assertThat(spec.fields().get(0).fieldId()).isEqualTo(1000); + assertThat(spec.fields().get(1).fieldId()).isEqualTo(1001); } @Test public void testTransforms() { for (PartitionSpec spec : PartitionSpecTestBase.SPECS) { - Assert.assertEquals( - "To/from JSON should produce equal partition spec", spec, roundTripJSON(spec)); + assertThat(roundTripJSON(spec)).isEqualTo(spec); } } diff --git a/core/src/test/java/org/apache/iceberg/TestPartitioning.java b/core/src/test/java/org/apache/iceberg/TestPartitioning.java index 4de62e3cfee3..91f0fe95c2fa 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitioning.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitioning.java @@ -19,21 +19,22 @@ package org.apache.iceberg; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; import org.apache.iceberg.types.Types.StructType; -import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestPartitioning { @@ -51,15 +52,15 @@ public class TestPartitioning { private static final PartitionSpec BY_DATA_CATEGORY_BUCKET_SPEC = PartitionSpec.builderFor(SCHEMA).identity("data").bucket("category", 8).build(); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private File tableDir = null; - @Before + @BeforeEach public void setupTableDir() throws IOException { - this.tableDir = temp.newFolder(); + this.tableDir = Files.createTempDirectory(temp, "junit").toFile(); } - @After + @AfterEach public void cleanupTables() { TestTables.clearTables(); } @@ -71,19 +72,19 @@ public void testPartitionTypeWithSpecEvolutionInV1Tables() { table.updateSpec().addField(Expressions.bucket("category", 8)).commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of( NestedField.optional(1000, "data", Types.StringType.get()), NestedField.optional(1001, "category_bucket_8", Types.IntegerType.get())); StructType actualType = Partitioning.partitionType(table); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); table.updateSpec().removeField("data").removeField("category_bucket_8").commit(); - Assert.assertEquals("Should have 3 specs", 3, table.specs().size()); - Assert.assertTrue("PartitionSpec should be unpartitioned", table.spec().isUnpartitioned()); + assertThat(table.specs()).hasSize(3); + assertThat(table.spec().isUnpartitioned()).isTrue(); } @Test @@ -93,14 +94,14 @@ public void testPartitionTypeWithSpecEvolutionInV2Tables() { table.updateSpec().removeField("data").addField("category").commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of( NestedField.optional(1000, "data", Types.StringType.get()), NestedField.optional(1001, "category", Types.StringType.get())); StructType actualType = Partitioning.partitionType(table); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -118,7 +119,7 @@ public void testPartitionTypeWithRenamesInV1Table() { NestedField.optional(1000, "p2", Types.StringType.get()), NestedField.optional(1001, "category", Types.StringType.get())); StructType actualType = Partitioning.partitionType(table); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -136,7 +137,7 @@ public void testPartitionTypeWithAddingBackSamePartitionFieldInV1Table() { NestedField.optional(1000, "data_1000", Types.StringType.get()), NestedField.optional(1001, "data", Types.StringType.get())); StructType actualType = Partitioning.partitionType(table); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -152,7 +153,7 @@ public void testPartitionTypeWithAddingBackSamePartitionFieldInV2Table() { StructType expectedType = StructType.of(NestedField.optional(1000, "data", Types.StringType.get())); StructType actualType = Partitioning.partitionType(table); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -166,9 +167,9 @@ public void testPartitionTypeWithIncompatibleSpecEvolution() { TableMetadata current = ops.current(); ops.commit(current, current.updatePartitionSpec(newSpec)); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); - Assertions.assertThatThrownBy(() -> Partitioning.partitionType(table)) + assertThatThrownBy(() -> Partitioning.partitionType(table)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Conflicting partition fields"); } @@ -180,12 +181,12 @@ public void testGroupingKeyTypeWithSpecEvolutionInV1Tables() { table.updateSpec().addField(Expressions.bucket("category", 8)).commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of(NestedField.optional(1000, "data", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -195,12 +196,12 @@ public void testGroupingKeyTypeWithSpecEvolutionInV2Tables() { table.updateSpec().addField(Expressions.bucket("category", 8)).commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of(NestedField.optional(1000, "data", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -211,12 +212,12 @@ public void testGroupingKeyTypeWithDroppedPartitionFieldInV1Tables() { table.updateSpec().removeField(Expressions.bucket("category", 8)).commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of(NestedField.optional(1000, "data", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -227,12 +228,12 @@ public void testGroupingKeyTypeWithDroppedPartitionFieldInV2Tables() { table.updateSpec().removeField(Expressions.bucket("category", 8)).commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of(NestedField.optional(1000, "data", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -248,7 +249,7 @@ public void testGroupingKeyTypeWithRenamesInV1Table() { StructType expectedType = StructType.of(NestedField.optional(1000, "p2", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -264,7 +265,7 @@ public void testGroupingKeyTypeWithRenamesInV2Table() { StructType expectedType = StructType.of(NestedField.optional(1000, "p2", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -274,11 +275,11 @@ public void testGroupingKeyTypeWithEvolvedIntoUnpartitionedSpecV1Table() { table.updateSpec().removeField("data").commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of(); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -288,11 +289,11 @@ public void testGroupingKeyTypeWithEvolvedIntoUnpartitionedSpecV2Table() { table.updateSpec().removeField("data").commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of(); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -307,7 +308,7 @@ public void testGroupingKeyTypeWithAddingBackSamePartitionFieldInV1Table() { StructType expectedType = StructType.of(NestedField.optional(1000, "category", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -322,7 +323,7 @@ public void testGroupingKeyTypeWithAddingBackSamePartitionFieldInV2Table() { StructType expectedType = StructType.of(NestedField.optional(1000, "category", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -331,11 +332,11 @@ public void testGroupingKeyTypeWithOnlyUnpartitionedSpec() { TestTables.create( tableDir, "test", SCHEMA, PartitionSpec.unpartitioned(), V1_FORMAT_VERSION); - Assert.assertEquals("Should have 1 spec", 1, table.specs().size()); + assertThat(table.specs()).hasSize(1); StructType expectedType = StructType.of(); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -346,11 +347,11 @@ public void testGroupingKeyTypeWithEvolvedUnpartitionedSpec() { table.updateSpec().addField(Expressions.bucket("category", 8)).commit(); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); StructType expectedType = StructType.of(); StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -363,7 +364,7 @@ public void testGroupingKeyTypeWithProjectedSchema() { StructType expectedType = StructType.of(NestedField.optional(1001, "data", Types.StringType.get())); StructType actualType = Partitioning.groupingKeyType(projectedSchema, table.specs().values()); - Assert.assertEquals("Types must match", expectedType, actualType); + assertThat(actualType).isEqualTo(expectedType); } @Test @@ -377,10 +378,9 @@ public void testGroupingKeyTypeWithIncompatibleSpecEvolution() { TableMetadata current = ops.current(); ops.commit(current, current.updatePartitionSpec(newSpec)); - Assert.assertEquals("Should have 2 specs", 2, table.specs().size()); + assertThat(table.specs()).hasSize(2); - Assertions.assertThatThrownBy( - () -> Partitioning.groupingKeyType(table.schema(), table.specs().values())) + assertThatThrownBy(() -> Partitioning.groupingKeyType(table.schema(), table.specs().values())) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Conflicting partition fields"); } @@ -403,6 +403,6 @@ public void testDeletingPartitionField() { .identity("id") .build(); - Assert.assertEquals("The spec should be there", spec, table.spec()); + assertThat(table.spec()).isEqualTo(spec); } } diff --git a/core/src/test/java/org/apache/iceberg/TestReplaceTransaction.java b/core/src/test/java/org/apache/iceberg/TestReplaceTransaction.java index b338d00696dd..3a6d2017eb82 100644 --- a/core/src/test/java/org/apache/iceberg/TestReplaceTransaction.java +++ b/core/src/test/java/org/apache/iceberg/TestReplaceTransaction.java @@ -22,9 +22,15 @@ import static org.apache.iceberg.PartitionSpec.unpartitioned; import static org.apache.iceberg.SortDirection.ASC; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -37,32 +43,24 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestReplaceTransaction extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestReplaceTransaction extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public TestReplaceTransaction(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testReplaceTransactionWithCustomSortOrder() { Snapshot start = table.currentSnapshot(); Schema schema = table.schema(); table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(start, table.currentSnapshot(), FILE_A); @@ -75,10 +73,9 @@ public void testReplaceTransactionWithCustomSortOrder() { table.refresh(); - Assert.assertEquals("Version should be 2", 2L, (long) version()); - Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); - Assert.assertEquals( - "Schema should match previous schema", schema.asStruct(), table.schema().asStruct()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot()).isNull(); + assertThat(table.schema().asStruct()).isEqualTo(schema.asStruct()); PartitionSpec v2Expected = PartitionSpec.builderFor(table.schema()).withSpecId(1).build(); V2Assert.assertEquals("Table should have an unpartitioned spec", v2Expected, table.spec()); @@ -90,18 +87,17 @@ public void testReplaceTransactionWithCustomSortOrder() { .build(); V1Assert.assertEquals("Table should have a spec with one void field", v1Expected, table.spec()); - Assert.assertEquals("Table should have 2 orders", 2, table.sortOrders().size()); + assertThat(table.sortOrders()).hasSize(2); SortOrder sortOrder = table.sortOrder(); - Assert.assertEquals("Order ID must match", 1, sortOrder.orderId()); - Assert.assertEquals("Order must have 1 field", 1, sortOrder.fields().size()); - Assert.assertEquals("Direction must match ", ASC, sortOrder.fields().get(0).direction()); - Assert.assertEquals( - "Null order must match ", NULLS_FIRST, sortOrder.fields().get(0).nullOrder()); + assertThat(sortOrder.orderId()).isEqualTo(1); + assertThat(sortOrder.fields()).hasSize(1); + assertThat(sortOrder.fields().get(0).direction()).isEqualTo(ASC); + assertThat(sortOrder.fields().get(0).nullOrder()).isEqualTo(NULLS_FIRST); Transform transform = Transforms.identity(); - Assert.assertEquals("Transform must match", transform, sortOrder.fields().get(0).transform()); + assertThat(sortOrder.fields().get(0).transform()).isEqualTo(transform); } - @Test + @TestTemplate public void testReplaceTransaction() { Schema newSchema = new Schema( @@ -113,7 +109,7 @@ public void testReplaceTransaction() { table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(start, table.currentSnapshot(), FILE_A); @@ -122,10 +118,9 @@ public void testReplaceTransaction() { table.refresh(); - Assert.assertEquals("Version should be 2", 2L, (long) version()); - Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); - Assert.assertEquals( - "Schema should match previous schema", schema.asStruct(), table.schema().asStruct()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot()).isNull(); + assertThat(table.schema().asStruct()).isEqualTo(schema.asStruct()); PartitionSpec v2Expected = PartitionSpec.builderFor(table.schema()).withSpecId(1).build(); V2Assert.assertEquals("Table should have an unpartitioned spec", v2Expected, table.spec()); @@ -137,15 +132,16 @@ public void testReplaceTransaction() { .build(); V1Assert.assertEquals("Table should have a spec with one void field", v1Expected, table.spec()); - Assert.assertEquals("Table should have 1 order", 1, table.sortOrders().size()); - Assert.assertEquals("Table order ID should match", 0, table.sortOrder().orderId()); - Assert.assertTrue("Table should be unsorted", table.sortOrder().isUnsorted()); + assertThat(table.sortOrders()).hasSize(1); + assertThat(table.sortOrder().orderId()).isEqualTo(0); + assertThat(table.sortOrder().isUnsorted()).isTrue(); } - @Test + @TestTemplate public void testReplaceWithIncompatibleSchemaUpdate() { - Assume.assumeTrue( - "Fails early for v1 tables because partition spec cannot drop a field", formatVersion == 2); + assumeThat(formatVersion) + .as("Fails early for v1 tables because partition spec cannot drop a field") + .isEqualTo(2); Schema newSchema = new Schema(required(4, "obj_id", Types.IntegerType.get())); @@ -153,7 +149,7 @@ public void testReplaceWithIncompatibleSchemaUpdate() { table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(start, table.currentSnapshot(), FILE_A); @@ -162,15 +158,13 @@ public void testReplaceWithIncompatibleSchemaUpdate() { table.refresh(); - Assert.assertEquals("Version should be 2", 2L, (long) version()); - Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); - Assert.assertEquals( - "Schema should use new schema, not compatible with previous", - new Schema(required(3, "obj_id", Types.IntegerType.get())).asStruct(), - table.schema().asStruct()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot()).isNull(); + assertThat(table.schema().asStruct()) + .isEqualTo(new Schema(required(3, "obj_id", Types.IntegerType.get())).asStruct()); } - @Test + @TestTemplate public void testReplaceWithNewPartitionSpec() { PartitionSpec newSpec = PartitionSpec.unpartitioned(); @@ -179,7 +173,7 @@ public void testReplaceWithNewPartitionSpec() { table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(start, table.currentSnapshot(), FILE_A); @@ -188,12 +182,9 @@ public void testReplaceWithNewPartitionSpec() { table.refresh(); - Assert.assertEquals("Version should be 2", 2L, (long) version()); - Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); - Assert.assertEquals( - "Schema should use new schema, not compatible with previous", - schema.asStruct(), - table.schema().asStruct()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot()).isNull(); + assertThat(table.schema().asStruct()).isEqualTo(schema.asStruct()); PartitionSpec v2Expected = PartitionSpec.builderFor(table.schema()).withSpecId(1).build(); V2Assert.assertEquals("Table should have an unpartitioned spec", v2Expected, table.spec()); @@ -206,14 +197,14 @@ public void testReplaceWithNewPartitionSpec() { V1Assert.assertEquals("Table should have a spec with one void field", v1Expected, table.spec()); } - @Test + @TestTemplate public void testReplaceWithNewData() { Snapshot start = table.currentSnapshot(); Schema schema = table.schema(); table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(start, table.currentSnapshot(), FILE_A); @@ -225,19 +216,16 @@ public void testReplaceWithNewData() { table.refresh(); - Assert.assertEquals("Version should be 2", 2L, (long) version()); - Assert.assertNotNull("Table should have a current snapshot", table.currentSnapshot()); - Assert.assertEquals( - "Schema should use new schema, not compatible with previous", - schema.asStruct(), - table.schema().asStruct()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot()).isNotNull(); + assertThat(table.schema().asStruct()).isEqualTo(schema.asStruct()); validateSnapshot(null, table.currentSnapshot(), FILE_B, FILE_C, FILE_D); } - @Test + @TestTemplate public void testReplaceDetectsUncommittedChangeOnCommit() { - Assert.assertEquals("Version should be 0", 0L, (long) version()); + assertThat(version()).isEqualTo(0); Transaction replace = TestTables.beginReplace(tableDir, "test", table.schema(), table.spec()); @@ -247,16 +235,16 @@ public void testReplaceDetectsUncommittedChangeOnCommit() { .appendFile(FILE_C) .appendFile(FILE_D); - Assertions.assertThatThrownBy(replace::commitTransaction) + assertThatThrownBy(replace::commitTransaction) .isInstanceOf(IllegalStateException.class) .hasMessage("Cannot commit transaction: last operation has not committed"); - Assert.assertEquals("Version should be 0", 0L, (long) version()); + assertThat(version()).isEqualTo(0); } - @Test + @TestTemplate public void testReplaceDetectsUncommittedChangeOnTableCommit() { - Assert.assertEquals("Version should be 0", 0L, (long) version()); + assertThat(version()).isEqualTo(0); Transaction replace = TestTables.beginReplace(tableDir, "test", table.schema(), table.spec()); @@ -267,21 +255,21 @@ public void testReplaceDetectsUncommittedChangeOnTableCommit() { .appendFile(FILE_C) .appendFile(FILE_D); - Assertions.assertThatThrownBy(replace::commitTransaction) + assertThatThrownBy(replace::commitTransaction) .isInstanceOf(IllegalStateException.class) .hasMessage("Cannot commit transaction: last operation has not committed"); - Assert.assertEquals("Version should be 0", 0L, (long) version()); + assertThat(version()).isEqualTo(0); } - @Test + @TestTemplate public void testReplaceTransactionRetry() { Snapshot start = table.currentSnapshot(); Schema schema = table.schema(); table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(start, table.currentSnapshot(), FILE_A); @@ -296,23 +284,20 @@ public void testReplaceTransactionRetry() { table.refresh(); - Assert.assertEquals("Version should be 2", 2L, (long) version()); - Assert.assertNotNull("Table should have a current snapshot", table.currentSnapshot()); - Assert.assertEquals( - "Schema should use new schema, not compatible with previous", - schema.asStruct(), - table.schema().asStruct()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot()).isNotNull(); + assertThat(table.schema().asStruct()).isEqualTo(schema.asStruct()); validateSnapshot(null, table.currentSnapshot(), FILE_B, FILE_C, FILE_D); } - @Test + @TestTemplate public void testReplaceTransactionConflict() { Snapshot start = table.currentSnapshot(); table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(start, table.currentSnapshot(), FILE_A); Set manifests = Sets.newHashSet(listManifestFiles()); @@ -324,63 +309,52 @@ public void testReplaceTransactionConflict() { // keep failing to trigger eventual transaction failure ((TestTables.TestTableOperations) ((BaseTransaction) replace).ops()).failCommits(100); - Assertions.assertThatThrownBy(replace::commitTransaction) + assertThatThrownBy(replace::commitTransaction) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); table.refresh(); validateSnapshot(start, table.currentSnapshot(), FILE_A); - Assert.assertEquals( - "Should clean up replace manifests", manifests, Sets.newHashSet(listManifestFiles())); + assertThat(listManifestFiles()).containsExactlyElementsOf(manifests); } - @Test + @TestTemplate public void testReplaceToCreateAndAppend() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); // this table doesn't exist. Transaction replace = TestTables.beginReplace(tableDir, "test_append", SCHEMA, unpartitioned()); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")).isNull(); + assertThat(TestTables.metadataVersion("test_append")).isNull(); - Assert.assertTrue( - "Should return a transaction table", - replace.table() instanceof BaseTransaction.TransactionTable); + assertThat(replace.table()).isInstanceOf(BaseTransaction.TransactionTable.class); replace.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertNull( - "Appending in a transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")).isNull(); + assertThat(TestTables.metadataVersion("test_append")).isNull(); replace.commitTransaction(); TableMetadata meta = TestTables.readMetadata("test_append"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); - Assert.assertEquals("Should have 1 manifest file", 1, listManifestFiles(tableDir).size()); - - Assert.assertEquals( - "Table schema should match with reassigned IDs", - assignFreshIds(SCHEMA).asStruct(), - meta.schema().asStruct()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size()); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_append")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).hasSize(1); + + assertThat(meta.schema().asStruct()).isEqualTo(assignFreshIds(SCHEMA).asStruct()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).hasSize(1); validateSnapshot(null, meta.currentSnapshot(), FILE_A, FILE_B); } - @Test + @TestTemplate public void testReplaceTransactionWithUnknownState() { Schema newSchema = new Schema( @@ -392,7 +366,7 @@ public void testReplaceTransactionWithUnknownState() { table.newAppend().appendFile(FILE_A).commit(); - Assert.assertEquals("Version should be 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1L); validateSnapshot(start, table.currentSnapshot(), FILE_A); TestTables.TestTableOperations ops = @@ -409,26 +383,23 @@ public void testReplaceTransactionWithUnknownState() { replace.newAppend().appendFile(FILE_B).commit(); - Assertions.assertThatThrownBy(replace::commitTransaction) + assertThatThrownBy(replace::commitTransaction) .isInstanceOf(CommitStateUnknownException.class) .hasMessageStartingWith("datacenter on fire"); table.refresh(); - Assert.assertEquals("Version should be 2", 2L, (long) version()); - Assert.assertNotNull("Table should have a current snapshot", table.currentSnapshot()); - Assert.assertEquals( - "Schema should use new schema, not compatible with previous", - schema.asStruct(), - table.schema().asStruct()); - Assert.assertEquals("Should have 4 files in metadata", 4, countAllMetadataFiles(tableDir)); + assertThat(version()).isEqualTo(2L); + assertThat(table.currentSnapshot()).isNotNull(); + assertThat(table.schema().asStruct()).isEqualTo(schema.asStruct()); + assertThat(countAllMetadataFiles(tableDir)).isEqualTo(4); validateSnapshot(null, table.currentSnapshot(), FILE_B); } - @Test + @TestTemplate public void testCreateTransactionWithUnknownState() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); // this table doesn't exist. TestTables.TestTableOperations ops = @@ -443,38 +414,28 @@ public void testCreateTransactionWithUnknownState() throws IOException { ImmutableMap.of(), ops); - Assert.assertNull( - "Starting a create transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")).isNull(); + assertThat(TestTables.metadataVersion("test_append")).isNull(); - Assert.assertTrue( - "Should return a transaction table", - replace.table() instanceof BaseTransaction.TransactionTable); + assertThat(replace.table()).isInstanceOf(BaseTransaction.TransactionTable.class); replace.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertNull( - "Appending in a transaction should not commit metadata", - TestTables.readMetadata("test_append")); - Assert.assertNull("Should have no metadata version", TestTables.metadataVersion("test_append")); + assertThat(TestTables.readMetadata("test_append")).isNull(); + assertThat(TestTables.metadataVersion("test_append")).isNull(); - Assertions.assertThatThrownBy(replace::commitTransaction) + assertThatThrownBy(replace::commitTransaction) .isInstanceOf(CommitStateUnknownException.class) .hasMessageStartingWith("datacenter on fire"); TableMetadata meta = TestTables.readMetadata("test_append"); - Assert.assertNotNull("Table metadata should be created after transaction commits", meta); - Assert.assertEquals( - "Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); - Assert.assertEquals("Should have 1 manifest file", 1, listManifestFiles(tableDir).size()); - Assert.assertEquals("Should have 2 files in metadata", 2, countAllMetadataFiles(tableDir)); - Assert.assertEquals( - "Table schema should match with reassigned IDs", - assignFreshIds(SCHEMA).asStruct(), - meta.schema().asStruct()); - Assert.assertEquals("Table spec should match", unpartitioned(), meta.spec()); - Assert.assertEquals("Table should have one snapshot", 1, meta.snapshots().size()); + assertThat(meta).isNotNull(); + assertThat(TestTables.metadataVersion("test_append")).isEqualTo(0); + assertThat(listManifestFiles(tableDir)).hasSize(1); + assertThat(countAllMetadataFiles(tableDir)).isEqualTo(2); + assertThat(meta.schema().asStruct()).isEqualTo(assignFreshIds(SCHEMA).asStruct()); + assertThat(meta.spec()).isEqualTo(unpartitioned()); + assertThat(meta.snapshots()).hasSize(1); validateSnapshot(null, meta.currentSnapshot(), FILE_A, FILE_B); } diff --git a/core/src/test/java/org/apache/iceberg/TestSetPartitionStatistics.java b/core/src/test/java/org/apache/iceberg/TestSetPartitionStatistics.java index 2ab5a141133f..3ebe047e90b7 100644 --- a/core/src/test/java/org/apache/iceberg/TestSetPartitionStatistics.java +++ b/core/src/test/java/org/apache/iceberg/TestSetPartitionStatistics.java @@ -18,36 +18,32 @@ */ package org.apache.iceberg; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestSetPartitionStatistics extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Arrays; +import java.util.List; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestSetPartitionStatistics extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public TestSetPartitionStatistics(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testEmptyUpdateStatistics() { assertTableMetadataVersion(0); TableMetadata base = readMetadata(); table.updatePartitionStatistics().commit(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, table.ops().current()); + assertThat(table.ops().current()).isSameAs(base); assertTableMetadataVersion(1); } - @Test + @TestTemplate public void testEmptyTransactionalUpdateStatistics() { assertTableMetadataVersion(0); TableMetadata base = readMetadata(); @@ -56,12 +52,11 @@ public void testEmptyTransactionalUpdateStatistics() { transaction.updatePartitionStatistics().commit(); transaction.commitTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, table.ops().current()); + assertThat(table.ops().current()).isSameAs(base); assertTableMetadataVersion(0); } - @Test + @TestTemplate public void testUpdateStatistics() { // Create a snapshot table.newFastAppend().commit(); @@ -80,17 +75,11 @@ public void testUpdateStatistics() { TableMetadata metadata = readMetadata(); assertTableMetadataVersion(2); - Assert.assertEquals( - "Table snapshot should be the same after setting partition statistics file", - snapshotId, - metadata.currentSnapshot().snapshotId()); - Assert.assertEquals( - "Table metadata should have partition statistics files", - ImmutableList.of(partitionStatisticsFile), - metadata.partitionStatisticsFiles()); + assertThat(metadata.currentSnapshot().snapshotId()).isEqualTo(snapshotId); + assertThat(metadata.partitionStatisticsFiles()).containsExactly(partitionStatisticsFile); } - @Test + @TestTemplate public void testRemoveStatistics() { // Create a snapshot table.newFastAppend().commit(); @@ -109,23 +98,16 @@ public void testRemoveStatistics() { TableMetadata metadata = readMetadata(); assertTableMetadataVersion(2); - Assert.assertEquals( - "Table metadata should have partition statistics files", - ImmutableList.of(partitionStatisticsFile), - metadata.partitionStatisticsFiles()); + assertThat(metadata.partitionStatisticsFiles()).containsExactly(partitionStatisticsFile); table.updatePartitionStatistics().removePartitionStatistics(snapshotId).commit(); metadata = readMetadata(); assertTableMetadataVersion(3); - Assert.assertEquals( - "Table metadata should have no partition statistics files", - ImmutableList.of(), - metadata.partitionStatisticsFiles()); + assertThat(metadata.partitionStatisticsFiles()).isEmpty(); } private void assertTableMetadataVersion(int expected) { - Assert.assertEquals( - String.format("Table should be on version %s", expected), expected, (int) version()); + assertThat(version()).isEqualTo(expected); } } diff --git a/core/src/test/java/org/apache/iceberg/TestSetStatistics.java b/core/src/test/java/org/apache/iceberg/TestSetStatistics.java index f594e08d1e91..41941e3c6630 100644 --- a/core/src/test/java/org/apache/iceberg/TestSetStatistics.java +++ b/core/src/test/java/org/apache/iceberg/TestSetStatistics.java @@ -18,55 +18,51 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestSetStatistics extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } - - public TestSetStatistics(int formatVersion) { - super(formatVersion); +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestSetStatistics extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - @Test + @TestTemplate public void testEmptyUpdateStatistics() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); TableMetadata base = readMetadata(); table.updateStatistics().commit(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, table.ops().current()); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(table.ops().current()).isSameAs(base); + assertThat(version()).isEqualTo(1); } - @Test + @TestTemplate public void testEmptyTransactionalUpdateStatistics() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); TableMetadata base = readMetadata(); Transaction transaction = table.newTransaction(); transaction.updateStatistics().commit(); transaction.commitTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, table.ops().current()); - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(table.ops().current()).isSameAs(base); + assertThat(version()).isEqualTo(0); } - @Test + @TestTemplate public void testUpdateStatistics() { // Create a snapshot table.newFastAppend().commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); TableMetadata base = readMetadata(); long snapshotId = base.currentSnapshot().snapshotId(); @@ -87,22 +83,16 @@ public void testUpdateStatistics() { table.updateStatistics().setStatistics(snapshotId, statisticsFile).commit(); TableMetadata metadata = readMetadata(); - Assert.assertEquals("Table should be on version 2", 2, (int) version()); - Assert.assertEquals( - "Table snapshot should be the same after setting statistics file", - snapshotId, - metadata.currentSnapshot().snapshotId()); - Assert.assertEquals( - "Table metadata should have statistics files", - ImmutableList.of(statisticsFile), - metadata.statisticsFiles()); + assertThat(version()).isEqualTo(2); + assertThat(metadata.currentSnapshot().snapshotId()).isEqualTo(snapshotId); + assertThat(metadata.statisticsFiles()).containsExactly(statisticsFile); } - @Test + @TestTemplate public void testRemoveStatistics() { // Create a snapshot table.newFastAppend().commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); TableMetadata base = readMetadata(); long snapshotId = base.currentSnapshot().snapshotId(); @@ -113,19 +103,13 @@ public void testRemoveStatistics() { table.updateStatistics().setStatistics(snapshotId, statisticsFile).commit(); TableMetadata metadata = readMetadata(); - Assert.assertEquals("Table should be on version 2", 2, (int) version()); - Assert.assertEquals( - "Table metadata should have statistics files", - ImmutableList.of(statisticsFile), - metadata.statisticsFiles()); + assertThat(version()).isEqualTo(2); + assertThat(metadata.statisticsFiles()).containsExactly(statisticsFile); table.updateStatistics().removeStatistics(snapshotId).commit(); metadata = readMetadata(); - Assert.assertEquals("Table should be on version 3", 3, (int) version()); - Assert.assertEquals( - "Table metadata should have no statistics files", - ImmutableList.of(), - metadata.statisticsFiles()); + assertThat(version()).isEqualTo(3); + assertThat(metadata.statisticsFiles()).isEmpty(); } } diff --git a/core/src/test/java/org/apache/iceberg/TestTransaction.java b/core/src/test/java/org/apache/iceberg/TestTransaction.java index 74892dbfbf7c..393494da1283 100644 --- a/core/src/test/java/org/apache/iceberg/TestTransaction.java +++ b/core/src/test/java/org/apache/iceberg/TestTransaction.java @@ -18,8 +18,13 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; + import java.io.File; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.UUID; @@ -30,65 +35,54 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.assertj.core.api.Assumptions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestTransaction extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } - - public TestTransaction(int formatVersion) { - super(formatVersion); +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestTransaction extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - @Test + @TestTemplate public void testEmptyTransaction() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); txn.commitTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); } - @Test + @TestTemplate public void testSingleOperationTransaction() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after txn create", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertSame( - "Base metadata should not change when an append is committed", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after append", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); txn.commitTransaction(); validateSnapshot(base.currentSnapshot(), readMetadata().currentSnapshot(), FILE_A, FILE_B); - Assert.assertEquals("Table should be on version 1 after commit", 1, (int) version()); + assertThat(version()).isEqualTo(1); } - @Test + @TestTemplate public void testMultipleOperationTransaction() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); table.newAppend().appendFile(FILE_C).commit(); List initialHistory = table.history(); @@ -97,15 +91,13 @@ public void testMultipleOperationTransaction() { Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after txn create", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after txn create", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); Snapshot appendSnapshot = txn.table().currentSnapshot(); @@ -113,55 +105,45 @@ public void testMultipleOperationTransaction() { Snapshot deleteSnapshot = txn.table().currentSnapshot(); - Assert.assertSame( - "Base metadata should not change when an append is committed", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after append", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 2 after commit", 2, (int) version()); - Assert.assertEquals( - "Table should have two manifest after commit", - 2, - readMetadata().currentSnapshot().allManifests(table.io()).size()); - Assert.assertEquals( - "Table snapshot should be the delete snapshot", - deleteSnapshot, - readMetadata().currentSnapshot()); + assertThat(version()).isEqualTo(2); + assertThat(readMetadata().currentSnapshot().allManifests(table.io())).hasSize(2); + assertThat(readMetadata().currentSnapshot()).isEqualTo(deleteSnapshot); validateManifestEntries( readMetadata().currentSnapshot().allManifests(table.io()).get(0), ids(deleteSnapshot.snapshotId(), appendSnapshot.snapshotId()), files(FILE_A, FILE_B), statuses(Status.DELETED, Status.EXISTING)); - Assert.assertEquals( - "Table should have a snapshot for each operation", 3, readMetadata().snapshots().size()); + assertThat(readMetadata().snapshots()).hasSize(3); validateManifestEntries( readMetadata().snapshots().get(1).allManifests(table.io()).get(0), ids(appendSnapshot.snapshotId(), appendSnapshot.snapshotId()), files(FILE_A, FILE_B), statuses(Status.ADDED, Status.ADDED)); - Assertions.assertThat(table.history()).containsAll(initialHistory); + assertThat(table.history()).containsAll(initialHistory); } - @Test + @TestTemplate public void testMultipleOperationTransactionFromTable() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after txn create", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after txn create", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); Snapshot appendSnapshot = txn.table().currentSnapshot(); @@ -169,29 +151,21 @@ public void testMultipleOperationTransactionFromTable() { Snapshot deleteSnapshot = txn.table().currentSnapshot(); - Assert.assertSame( - "Base metadata should not change when an append is committed", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after append", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 1 after commit", 1, (int) version()); - Assert.assertEquals( - "Table should have one manifest after commit", - 1, - readMetadata().currentSnapshot().allManifests(table.io()).size()); - Assert.assertEquals( - "Table snapshot should be the delete snapshot", - deleteSnapshot, - readMetadata().currentSnapshot()); + assertThat(version()).isEqualTo(1); + assertThat(readMetadata().currentSnapshot().allManifests(table.io())).hasSize(1); + assertThat(readMetadata().currentSnapshot()).isEqualTo(deleteSnapshot); validateManifestEntries( readMetadata().currentSnapshot().allManifests(table.io()).get(0), ids(deleteSnapshot.snapshotId(), appendSnapshot.snapshotId()), files(FILE_A, FILE_B), statuses(Status.DELETED, Status.EXISTING)); - Assert.assertEquals( - "Table should have a snapshot for each operation", 2, readMetadata().snapshots().size()); + assertThat(readMetadata().snapshots()).hasSize(2); validateManifestEntries( readMetadata().snapshots().get(0).allManifests(table.io()).get(0), ids(appendSnapshot.snapshotId(), appendSnapshot.snapshotId()), @@ -199,165 +173,151 @@ public void testMultipleOperationTransactionFromTable() { statuses(Status.ADDED, Status.ADDED)); } - @Test + @TestTemplate public void testDetectsUncommittedChange() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after txn create", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B); // not committed - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after txn create", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); - Assertions.assertThatThrownBy(txn::newDelete) + assertThatThrownBy(txn::newDelete) .isInstanceOf(IllegalStateException.class) .hasMessage("Cannot create new DeleteFiles: last operation has not committed"); } - @Test + @TestTemplate public void testDetectsUncommittedChangeOnCommit() { - Assert.assertEquals("Table should be on version 0", 0, (int) version()); + assertThat(version()).isEqualTo(0); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after txn create", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B); // not committed - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 0 after txn create", 0, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(0); - Assertions.assertThatThrownBy(txn::commitTransaction) + assertThatThrownBy(txn::commitTransaction) .isInstanceOf(IllegalStateException.class) .hasMessage("Cannot commit transaction: last operation has not committed"); } - @Test + @TestTemplate public void testTransactionConflict() { // set retries to 0 to catch the failure table.updateProperties().set(TableProperties.COMMIT_NUM_RETRIES, "0").commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after txn create", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after append", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); // cause the transaction commit to fail table.ops().failCommits(1); - Assertions.assertThatThrownBy(txn::commitTransaction) + assertThatThrownBy(txn::commitTransaction) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); } - @Test + @TestTemplate public void testTransactionRetry() { // use only one retry table.updateProperties().set(TableProperties.COMMIT_NUM_RETRIES, "1").commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after txn create", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); Set appendManifests = Sets.newHashSet(txn.table().currentSnapshot().allManifests(table.io())); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after append", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); // cause the transaction commit to fail table.ops().failCommits(1); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 2 after commit", 2, (int) version()); + assertThat(version()).isEqualTo(2); - Assert.assertEquals( - "Should reuse manifests from initial append commit", - appendManifests, - Sets.newHashSet(table.currentSnapshot().allManifests(table.io()))); + assertThat(Sets.newHashSet(table.currentSnapshot().allManifests(table.io()))) + .isEqualTo(appendManifests); } - @Test + @TestTemplate public void testTransactionRetryMergeAppend() { // use only one retry table.updateProperties().set(TableProperties.COMMIT_NUM_RETRIES, "1").commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after txn create", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); Set appendManifests = Sets.newHashSet(txn.table().currentSnapshot().allManifests(table.io())); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after append", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); // cause the transaction commit to fail table.newAppend().appendFile(FILE_C).appendFile(FILE_D).commit(); - Assert.assertEquals("Table should be on version 2 after real append", 2, (int) version()); + assertThat(version()).isEqualTo(2); Set conflictAppendManifests = Sets.newHashSet(table.currentSnapshot().allManifests(table.io())); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 3 after commit", 3, (int) version()); + assertThat(version()).isEqualTo(3); Set expectedManifests = Sets.newHashSet(); expectedManifests.addAll(appendManifests); expectedManifests.addAll(conflictAppendManifests); - Assert.assertEquals( - "Should reuse manifests from initial append commit and conflicting append", - expectedManifests, - Sets.newHashSet(table.currentSnapshot().allManifests(table.io()))); + assertThat(Sets.newHashSet(table.currentSnapshot().allManifests(table.io()))) + .isEqualTo(expectedManifests); } - @Test + @TestTemplate public void testMultipleUpdateTransactionRetryMergeCleanup() { // use only one retry and aggressively merge manifests table @@ -366,59 +326,49 @@ public void testMultipleUpdateTransactionRetryMergeCleanup() { .set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "0") .commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after txn create", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); txn.updateProperties().set("test-property", "test-value").commit(); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertEquals( - "Append should create one manifest", - 1, - txn.table().currentSnapshot().allManifests(table.io()).size()); + assertThat(txn.table().currentSnapshot().allManifests(table.io())).hasSize(1); ManifestFile appendManifest = txn.table().currentSnapshot().allManifests(table.io()).get(0); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after append", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); // cause the transaction commit to fail table.newAppend().appendFile(FILE_C).appendFile(FILE_D).commit(); - Assert.assertEquals("Table should be on version 2 after real append", 2, (int) version()); + assertThat(version()).isEqualTo(2); Set conflictAppendManifests = Sets.newHashSet(table.currentSnapshot().allManifests(table.io())); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 3 after commit", 3, (int) version()); + assertThat(version()).isEqualTo(3); Set previousManifests = Sets.newHashSet(); previousManifests.add(appendManifest); previousManifests.addAll(conflictAppendManifests); - Assert.assertEquals( - "Should merge both commit manifests into a single manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); - Assert.assertFalse( - "Should merge both commit manifests into a new manifest", - previousManifests.contains(table.currentSnapshot().allManifests(table.io()).get(0))); + assertThat(table.currentSnapshot().allManifests(table.io())) + .hasSize(1) + .doesNotContainAnyElementsOf(previousManifests); - Assert.assertFalse( - "Append manifest should be deleted", new File(appendManifest.path()).exists()); + assertThat(new File(appendManifest.path())).doesNotExist(); } - @Test + @TestTemplate public void testTransactionRetrySchemaUpdate() { // use only one retry table.updateProperties().set(TableProperties.COMMIT_NUM_RETRIES, "1").commit(); @@ -434,18 +384,15 @@ public void testTransactionRetrySchemaUpdate() { table.updateSchema().addColumn("another-column", Types.IntegerType.get()).commit(); int conflictingSchemaId = table.schema().schemaId(); - Assert.assertEquals( - "Both schema IDs should be the same in order to cause a conflict", - conflictingSchemaId, - schemaId); + assertThat(schemaId).isEqualTo(conflictingSchemaId); // commit the transaction for adding "new-column" - Assertions.assertThatThrownBy(txn::commitTransaction) + assertThatThrownBy(txn::commitTransaction) .isInstanceOf(CommitFailedException.class) .hasMessage("Table metadata refresh is required"); } - @Test + @TestTemplate public void testTransactionRetryMergeCleanup() { // use only one retry and aggressively merge manifests table @@ -454,61 +401,50 @@ public void testTransactionRetryMergeCleanup() { .set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "0") .commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); TableMetadata base = readMetadata(); Transaction txn = table.newTransaction(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after txn create", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); txn.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertEquals( - "Append should create one manifest", - 1, - txn.table().currentSnapshot().allManifests(table.io()).size()); + assertThat(txn.table().currentSnapshot().allManifests(table.io())).hasSize(1); ManifestFile appendManifest = txn.table().currentSnapshot().allManifests(table.io()).get(0); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 1 after append", 1, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(1); // cause the transaction commit to fail table.newAppend().appendFile(FILE_C).appendFile(FILE_D).commit(); - Assert.assertEquals("Table should be on version 2 after real append", 2, (int) version()); + assertThat(version()).isEqualTo(2); Set conflictAppendManifests = Sets.newHashSet(table.currentSnapshot().allManifests(table.io())); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 3 after commit", 3, (int) version()); + assertThat(version()).isEqualTo(3); Set previousManifests = Sets.newHashSet(); previousManifests.add(appendManifest); previousManifests.addAll(conflictAppendManifests); - Assert.assertEquals( - "Should merge both commit manifests into a single manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); - Assert.assertFalse( - "Should merge both commit manifests into a new manifest", - previousManifests.contains(table.currentSnapshot().allManifests(table.io()).get(0))); - - Assert.assertFalse( - "Append manifest should be deleted", new File(appendManifest.path()).exists()); + assertThat(table.currentSnapshot().allManifests(table.io())) + .hasSize(1) + .doesNotContainAnyElementsOf(previousManifests); + assertThat(new File(appendManifest.path())).doesNotExist(); } - @Test + @TestTemplate public void testTransactionRetryAndAppendManifestsWithoutSnapshotIdInheritance() throws Exception { // this test assumes append manifests are rewritten, which only happens in V1 tables - Assumptions.assumeThat(formatVersion).isEqualTo(1); + assumeThat(formatVersion).isEqualTo(1); // use only one retry and aggressively merge manifests table @@ -517,22 +453,18 @@ public void testTransactionRetryAndAppendManifestsWithoutSnapshotIdInheritance() .set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "0") .commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertEquals("Table should be on version 2 after append", 2, (int) version()); - Assert.assertEquals( - "Append should create one manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot().allManifests(table.io())).hasSize(1); ManifestFile v1manifest = table.currentSnapshot().allManifests(table.io()).get(0); TableMetadata base = readMetadata(); // create a manifest append - OutputFile manifestLocation = - Files.localOutput("/tmp/" + UUID.randomUUID().toString() + ".avro"); + OutputFile manifestLocation = Files.localOutput("/tmp/" + UUID.randomUUID() + ".avro"); ManifestWriter writer = ManifestFiles.write(table.spec(), manifestLocation); try { writer.add(FILE_D); @@ -544,14 +476,10 @@ public void testTransactionRetryAndAppendManifestsWithoutSnapshotIdInheritance() txn.newAppend().appendManifest(writer.toManifestFile()).commit(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 2 after txn create", 2, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(2); - Assert.assertEquals( - "Append should have one merged manifest", - 1, - txn.table().currentSnapshot().allManifests(table.io()).size()); + assertThat(txn.table().currentSnapshot().allManifests(table.io())).hasSize(1); ManifestFile mergedManifest = txn.table().currentSnapshot().allManifests(table.io()).get(0); // find the initial copy of the appended manifest @@ -562,40 +490,34 @@ public void testTransactionRetryAndAppendManifestsWithoutSnapshotIdInheritance() path -> !v1manifest.path().contains(path) && !mergedManifest.path().contains(path))); - Assert.assertTrue( - "Transaction should hijack the delete of the original copied manifest", - ((BaseTransaction) txn).deletedFiles().contains(copiedAppendManifest)); - Assert.assertTrue( - "Copied append manifest should not be deleted yet", - new File(copiedAppendManifest).exists()); + assertThat(((BaseTransaction) txn).deletedFiles()) + .as("Transaction should hijack the delete of the original copied manifest") + .contains(copiedAppendManifest); + assertThat(new File(copiedAppendManifest)).exists(); // cause the transaction commit to fail and retry table.newAppend().appendFile(FILE_C).commit(); - Assert.assertEquals("Table should be on version 3 after real append", 3, (int) version()); + assertThat(version()).isEqualTo(3); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 4 after commit", 4, (int) version()); - - Assert.assertTrue( - "Transaction should hijack the delete of the original copied manifest", - ((BaseTransaction) txn).deletedFiles().contains(copiedAppendManifest)); - Assert.assertFalse( - "Append manifest should be deleted", new File(copiedAppendManifest).exists()); - Assert.assertTrue( - "Transaction should hijack the delete of the first merged manifest", - ((BaseTransaction) txn).deletedFiles().contains(mergedManifest.path())); - Assert.assertFalse( - "Append manifest should be deleted", new File(mergedManifest.path()).exists()); - - Assert.assertEquals( - "Should merge all commit manifests into a single manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(version()).isEqualTo(4); + + assertThat(((BaseTransaction) txn).deletedFiles()) + .as("Transaction should hijack the delete of the original copied manifest") + .contains(copiedAppendManifest); + + assertThat(new File(copiedAppendManifest)).doesNotExist(); + assertThat(((BaseTransaction) txn).deletedFiles()) + .as("Transaction should hijack the delete of the first merged manifest") + .contains(mergedManifest.path()); + assertThat(new File(mergedManifest.path())).doesNotExist(); + + assertThat(table.currentSnapshot().allManifests(table.io())).hasSize(1); } - @Test + @TestTemplate public void testTransactionRetryAndAppendManifestsWithSnapshotIdInheritance() throws Exception { // use only one retry and aggressively merge manifests table @@ -605,15 +527,12 @@ public void testTransactionRetryAndAppendManifestsWithSnapshotIdInheritance() th .set(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, "true") .commit(); - Assert.assertEquals("Table should be on version 1", 1, (int) version()); + assertThat(version()).isEqualTo(1); table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); - Assert.assertEquals("Table should be on version 2 after append", 2, (int) version()); - Assert.assertEquals( - "Append should create one manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(version()).isEqualTo(2); + assertThat(table.currentSnapshot().allManifests(table.io())).hasSize(1); TableMetadata base = readMetadata(); @@ -622,46 +541,37 @@ public void testTransactionRetryAndAppendManifestsWithSnapshotIdInheritance() th ManifestFile appendManifest = writeManifestWithName("input.m0", FILE_D); txn.newAppend().appendManifest(appendManifest).commit(); - Assert.assertSame( - "Base metadata should not change when commit is created", base, readMetadata()); - Assert.assertEquals("Table should be on version 2 after txn create", 2, (int) version()); + assertThat(readMetadata()).isSameAs(base); + assertThat(version()).isEqualTo(2); - Assert.assertEquals( - "Append should have one merged manifest", - 1, - txn.table().currentSnapshot().allManifests(table.io()).size()); + assertThat(txn.table().currentSnapshot().allManifests(table.io())).hasSize(1); ManifestFile mergedManifest = txn.table().currentSnapshot().allManifests(table.io()).get(0); // cause the transaction commit to fail and retry table.newAppend().appendFile(FILE_C).commit(); - Assert.assertEquals("Table should be on version 3 after real append", 3, (int) version()); + assertThat(version()).isEqualTo(3); txn.commitTransaction(); - Assert.assertEquals("Table should be on version 4 after commit", 4, (int) version()); + assertThat(version()).isEqualTo(4); - Assert.assertTrue( - "Transaction should hijack the delete of the original append manifest", - ((BaseTransaction) txn).deletedFiles().contains(appendManifest.path())); - Assert.assertFalse( - "Append manifest should be deleted", new File(appendManifest.path()).exists()); + assertThat(((BaseTransaction) txn).deletedFiles()) + .as("Transaction should hijack the delete of the original append manifest") + .contains(appendManifest.path()); + assertThat(new File(appendManifest.path())).doesNotExist(); - Assert.assertTrue( - "Transaction should hijack the delete of the first merged manifest", - ((BaseTransaction) txn).deletedFiles().contains(mergedManifest.path())); - Assert.assertFalse( - "Merged append manifest should be deleted", new File(mergedManifest.path()).exists()); + assertThat(((BaseTransaction) txn).deletedFiles()) + .as("Transaction should hijack the delete of the first merged manifest") + .contains(mergedManifest.path()); + assertThat(new File(appendManifest.path())).doesNotExist(); - Assert.assertEquals( - "Should merge all commit manifests into a single manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())).hasSize(1); } - @Test + @TestTemplate public void testTransactionNoCustomDeleteFunc() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> table .newTransaction() @@ -673,7 +583,7 @@ public void testTransactionNoCustomDeleteFunc() { .hasMessage("Cannot set delete callback more than once"); } - @Test + @TestTemplate public void testTransactionFastAppends() { table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "0").commit(); @@ -686,10 +596,10 @@ public void testTransactionFastAppends() { txn.commitTransaction(); List manifests = table.currentSnapshot().allManifests(table.io()); - Assert.assertEquals("Expected 2 manifests", 2, manifests.size()); + assertThat(manifests).hasSize(2); } - @Test + @TestTemplate public void testTransactionRewriteManifestsAppendedDirectly() throws IOException { Table table = load(); @@ -706,7 +616,7 @@ public void testTransactionRewriteManifestsAppendedDirectly() throws IOException long secondSnapshotId = table.currentSnapshot().snapshotId(); List manifests = table.currentSnapshot().allManifests(table.io()); - Assert.assertEquals("Should have 2 manifests after 2 appends", 2, manifests.size()); + assertThat(manifests).hasSize(2); ManifestFile newManifest = writeManifest( @@ -726,11 +636,10 @@ public void testTransactionRewriteManifestsAppendedDirectly() throws IOException long finalSnapshotId = table.currentSnapshot().snapshotId(); long finalSnapshotTimestamp = System.currentTimeMillis(); - Assert.assertTrue( - "Append manifest should not be deleted", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); List finalManifests = table.currentSnapshot().allManifests(table.io()); - Assert.assertEquals("Should have 1 final manifest", 1, finalManifests.size()); + assertThat(finalManifests).hasSize(1); validateManifestEntries( finalManifests.get(0), @@ -743,30 +652,29 @@ public void testTransactionRewriteManifestsAppendedDirectly() throws IOException table.expireSnapshots().expireOlderThan(finalSnapshotTimestamp + 1).retainLast(1).commit(); - Assert.assertFalse( - "Append manifest should be deleted on expiry", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).doesNotExist(); } - @Test + @TestTemplate public void testSimpleTransactionNotDeletingMetadataOnUnknownSate() throws IOException { Table table = TestTables.tableWithCommitSucceedButStateUnknown(tableDir, "test"); Transaction transaction = table.newTransaction(); transaction.newAppend().appendFile(FILE_A).commit(); - Assertions.assertThatThrownBy(transaction::commitTransaction) + assertThatThrownBy(transaction::commitTransaction) .isInstanceOf(CommitStateUnknownException.class) .hasMessageStartingWith("datacenter on fire"); // Make sure metadata files still exist Snapshot current = table.currentSnapshot(); List manifests = current.allManifests(table.io()); - Assert.assertEquals("Should have 1 manifest file", 1, manifests.size()); - Assert.assertTrue("Manifest file should exist", new File(manifests.get(0).path()).exists()); - Assert.assertEquals("Should have 2 files in metadata", 2, countAllMetadataFiles(tableDir)); + assertThat(manifests).hasSize(1); + assertThat(new File(manifests.get(0).path())).exists(); + assertThat(countAllMetadataFiles(tableDir)).isEqualTo(2); } - @Test + @TestTemplate public void testTransactionRecommit() { // update table settings to merge when there are 3 manifests table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "3").commit(); @@ -781,18 +689,14 @@ public void testTransactionRecommit() { AppendFiles append = transaction.newAppend().appendFile(FILE_D); Snapshot pending = append.apply(); - Assert.assertEquals( - "Should produce 1 pending merged manifest", 1, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(1); // because a merge happened, the appended manifest is deleted the by append operation append.commit(); // concurrently commit FILE_A without a transaction to cause the previous append to retry table.newAppend().appendFile(FILE_C).commit(); - Assert.assertEquals( - "Should produce 1 committed merged manifest", - 1, - table.currentSnapshot().allManifests(table.io()).size()); + assertThat(table.currentSnapshot().allManifests(table.io())).hasSize(1); transaction.commitTransaction(); @@ -807,9 +711,7 @@ public void testTransactionRecommit() { FILE_C.path().toString(), FILE_D.path().toString()); - Assert.assertEquals("Should contain all committed files", expectedPaths, paths); - - Assert.assertEquals( - "Should produce 2 manifests", 2, table.currentSnapshot().allManifests(table.io()).size()); + assertThat(paths).isEqualTo(expectedPaths); + assertThat(table.currentSnapshot().allManifests(table.io())).hasSize(2); } } From f8d60ea993c5e87d377dbc07e32cc6d0f28914d4 Mon Sep 17 00:00:00 2001 From: Jay Chia <17691182+jaychia@users.noreply.github.com> Date: Wed, 20 Mar 2024 04:01:21 -0700 Subject: [PATCH 17/23] Docs: Add Daft into Iceberg documentation (#9836) --- docs/docs/daft.md | 145 ++++++++++++++++++++++++++++++++++++++++++++++ docs/mkdocs.yml | 1 + 2 files changed, 146 insertions(+) create mode 100644 docs/docs/daft.md diff --git a/docs/docs/daft.md b/docs/docs/daft.md new file mode 100644 index 000000000000..da78b7eb6ccd --- /dev/null +++ b/docs/docs/daft.md @@ -0,0 +1,145 @@ +--- +title: "Daft" +--- + + +# Daft + +[Daft](www.getdaft.io) is a distributed query engine written in Python and Rust, two fast-growing ecosystems in the data engineering and machine learning industry. + +It exposes its flavor of the familiar [Python DataFrame API](https://www.getdaft.io/projects/docs/en/latest/api_docs/dataframe.html) which is a common abstraction over querying tables of data in the Python data ecosystem. + +Daft DataFrames are a powerful interface to power use-cases across ML/AI training, batch inference, feature engineering and traditional analytics. Daft's tight integration with Iceberg unlocks novel capabilities for both traditional analytics and Pythonic ML workloads on your data catalog. + +## Enabling Iceberg support in Daft + +[PyIceberg](https://py.iceberg.apache.org/) supports reading of Iceberg tables into Daft DataFrames. + +To use Iceberg with Daft, ensure that the [PyIceberg](https://py.iceberg.apache.org/) library is also installed in your current Python environment. + +``` +pip install getdaft pyiceberg +``` + +## Querying Iceberg using Daft + +Daft interacts natively with [PyIceberg](https://py.iceberg.apache.org/) to read Iceberg tables. + +### Reading Iceberg tables + +**Setup Steps** + +To follow along with this code, first create an Iceberg table following [the Spark Quickstart tutorial](https://iceberg.apache.org/spark-quickstart/). PyIceberg must then be correctly configured by ensuring that the `~/.pyiceberg.yaml` file contains an appropriate catalog entry: + +``` +catalog: + default: + # URL to the Iceberg REST server Docker container + uri: http://localhost:8181 + # URL and credentials for the MinIO Docker container + s3.endpoint: http://localhost:9000 + s3.access-key-id: admin + s3.secret-access-key: password +``` + +Here is how the Iceberg table `demo.nyc.taxis` can be loaded into Daft: + +``` py +import daft +from pyiceberg.catalog import load_catalog + +# Configure Daft to use the local MinIO Docker container for any S3 operations +daft.set_planning_config( + default_io_config=daft.io.IOConfig( + s3=daft.io.S3Config(endpoint_url="http://localhost:9000"), + ) +) + +# Load a PyIceberg table into Daft, and show the first few rows +table = load_catalog("default").load_table("nyc.taxis") +df = daft.read_iceberg(table) +df.show() +``` + +``` +╭───────────┬─────────┬───────────────┬─────────────┬────────────────────╮ +│ vendor_id ┆ trip_id ┆ trip_distance ┆ fare_amount ┆ store_and_fwd_flag │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ Int64 ┆ Int64 ┆ Float32 ┆ Float64 ┆ Utf8 │ +╞═══════════╪═════════╪═══════════════╪═════════════╪════════════════════╡ +│ 1 ┆ 1000371 ┆ 1.8 ┆ 15.32 ┆ N │ +├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 1 ┆ 1000374 ┆ 8.4 ┆ 42.13 ┆ Y │ +├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 2 ┆ 1000372 ┆ 2.5 ┆ 22.15 ┆ N │ +├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 2 ┆ 1000373 ┆ 0.9 ┆ 9.01 ┆ N │ +╰───────────┴─────────┴───────────────┴─────────────┴────────────────────╯ + +(Showing first 4 of 4 rows) +``` + +Note that the operation above will produce a warning from PyIceberg that "no partition filter was specified" and that "this will result in a full table scan". Any filter operations on the Daft dataframe, `df`, will [push down the filters](https://iceberg.apache.org/docs/latest/performance/#data-filtering), correctly account for [hidden partitioning](https://iceberg.apache.org/docs/latest/partitioning/), and utilize [table statistics to inform query planning](https://iceberg.apache.org/docs/latest/performance/#scan-planning) for efficient reads. + +Let's try the above query again, but this time with a filter applied on the table's partition column `"vendor_id"` which Daft will correctly use to elide a full table scan. + +``` py +df = df.where(df["vendor_id"] > 1) +df.show() +``` + +``` +╭───────────┬─────────┬───────────────┬─────────────┬────────────────────╮ +│ vendor_id ┆ trip_id ┆ trip_distance ┆ fare_amount ┆ store_and_fwd_flag │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ Int64 ┆ Int64 ┆ Float32 ┆ Float64 ┆ Utf8 │ +╞═══════════╪═════════╪═══════════════╪═════════════╪════════════════════╡ +│ 2 ┆ 1000372 ┆ 2.5 ┆ 22.15 ┆ N │ +├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ 2 ┆ 1000373 ┆ 0.9 ┆ 9.01 ┆ N │ +╰───────────┴─────────┴───────────────┴─────────────┴────────────────────╯ + +(Showing first 2 of 2 rows) +``` + +### Type compatibility + +Daft and Iceberg have compatible type systems. Here are how types are converted across the two systems. + + +| Iceberg | Daft | +|---------|------| +| **Primitive Types** | +| `boolean` | [`daft.DataType.bool()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.bool) | +| `int` | [`daft.DataType.int32()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.int32) | +| `long` | [`daft.DataType.int64()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.int64) | +| `float` | [`daft.DataType.float32()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.float32) | +| `double` | [`daft.DataType.float64()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.float64) | +| `decimal(precision, scale)` | [`daft.DataType.decimal128(precision, scale)`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.decimal128) | +| `date` | [`daft.DataType.date()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.date) | +| `time` | [`daft.DataType.time(timeunit="us")`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.int64) | +| `timestamp` | [`daft.DataType.timestamp(timeunit="us", timezone=None)`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.timestamp) | +| `timestampz` | [`daft.DataType.timestamp(timeunit="us", timezone="UTC")`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.timestamp) | +| `string` | [`daft.DataType.string()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.string) | +| `uuid` | [`daft.DataType.binary()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.binary) | +| `fixed(L)` | [`daft.DataType.binary()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.binary) | +| `binary` | [`daft.DataType.binary()`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.binary) | +| **Nested Types** | +| `struct(**fields)` | [`daft.DataType.struct(**fields)`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.struct) | +| `list(child_type)` | [`daft.DataType.list(child_type)`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.list) | +| `map(K, V)` | [`daft.DataType.map(K, V)`](https://www.getdaft.io/projects/docs/en/latest/api_docs/datatype.html#daft.DataType.map) | diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index a8804ed04dc4..a803f4585e41 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -51,6 +51,7 @@ nav: - flink-configuration.md - hive.md - Trino: https://trino.io/docs/current/connector/iceberg.html + - Daft: daft.md - Clickhouse: https://clickhouse.com/docs/en/engines/table-engines/integrations/iceberg - Presto: https://prestodb.io/docs/current/connector/iceberg.html - Dremio: https://docs.dremio.com/data-formats/apache-iceberg/ From aa17c0a3090a0843fd7e3111a5285d6e23d24dcd Mon Sep 17 00:00:00 2001 From: Tom Tanaka <43331405+tomtongue@users.noreply.github.com> Date: Thu, 21 Mar 2024 00:30:19 +0900 Subject: [PATCH 18/23] Core: Migrate tests to JUnit5 (#9994) --- .../org/apache/iceberg/TestDeleteFiles.java | 115 ++-- .../org/apache/iceberg/TestFastAppend.java | 307 ++++------ .../org/apache/iceberg/TestMergeAppend.java | 561 +++++++----------- .../org/apache/iceberg/TestOverwrite.java | 104 ++-- .../iceberg/TestOverwriteWithValidation.java | 226 ++++--- .../org/apache/iceberg/TestRewriteFiles.java | 304 +++++----- 6 files changed, 675 insertions(+), 942 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java b/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java index 63fc7010c49c..18e3de240170 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java @@ -19,30 +19,29 @@ package org.apache.iceberg; import static org.apache.iceberg.util.SnapshotUtil.latestSnapshot; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import org.apache.iceberg.ManifestEntry.Status; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.StructLikeWrapper; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; -@RunWith(Parameterized.class) -public class TestDeleteFiles extends TableTestBase { +@ExtendWith(ParameterizedTestExtension.class) +public class TestDeleteFiles extends TestBase { private static final DataFile DATA_FILE_BUCKET_0_IDS_0_2 = DataFiles.builder(SPEC) @@ -78,36 +77,31 @@ public class TestDeleteFiles extends TableTestBase { )) .build(); - private final String branch; + @Parameter(index = 1) + private String branch; - @Parameterized.Parameters(name = "formatVersion = {0}, branch = {1}") - public static Object[] parameters() { - return new Object[][] { - new Object[] {1, "main"}, - new Object[] {1, "testBranch"}, - new Object[] {2, "main"}, - new Object[] {2, "testBranch"} - }; + @Parameters(name = "formatVersion = {0}, branch = {1}") + protected static List parameters() { + return Arrays.asList( + new Object[] {1, "main"}, + new Object[] {1, "testBranch"}, + new Object[] {2, "main"}, + new Object[] {2, "testBranch"}); } - public TestDeleteFiles(int formatVersion, String branch) { - super(formatVersion); - this.branch = branch; - } - - @Test + @TestTemplate public void testMultipleDeletes() { commit( table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B).appendFile(FILE_C), branch); Snapshot append = latestSnapshot(readMetadata(), branch); - Assert.assertEquals("Metadata should be at version 1", 1L, (long) version()); + assertThat(version()).isEqualTo(1); validateSnapshot(null, append, FILE_A, FILE_B, FILE_C); commit(table, table.newDelete().deleteFile(FILE_A), branch); Snapshot delete1 = latestSnapshot(readMetadata(), branch); - Assert.assertEquals("Metadata should be at version 2", 2L, (long) version()); - Assert.assertEquals("Should have 1 manifest", 1, delete1.allManifests(FILE_IO).size()); + assertThat(version()).isEqualTo(2); + assertThat(delete1.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( delete1.allManifests(table.io()).get(0), ids(delete1.snapshotId(), append.snapshotId(), append.snapshotId()), @@ -115,8 +109,8 @@ public void testMultipleDeletes() { statuses(Status.DELETED, Status.EXISTING, Status.EXISTING)); Snapshot delete2 = commit(table, table.newDelete().deleteFile(FILE_B), branch); - Assert.assertEquals("Metadata should be at version 3", 3L, (long) version()); - Assert.assertEquals("Should have 1 manifest", 1, delete2.allManifests(FILE_IO).size()); + assertThat(version()).isEqualTo(3); + assertThat(delete2.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( delete2.allManifests(FILE_IO).get(0), ids(delete2.snapshotId(), append.snapshotId()), @@ -124,7 +118,7 @@ public void testMultipleDeletes() { statuses(Status.DELETED, Status.EXISTING)); } - @Test + @TestTemplate public void testAlreadyDeletedFilesAreIgnoredDuringDeletesByRowFilter() { PartitionSpec spec = table.spec(); @@ -169,7 +163,7 @@ public void testAlreadyDeletedFilesAreIgnoredDuringDeletesByRowFilter() { table.newFastAppend().appendFile(firstDataFile).appendFile(secondDataFile), branch); - Assert.assertEquals("Should have 1 manifest", 1, initialSnapshot.allManifests(FILE_IO).size()); + assertThat(initialSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( initialSnapshot.allManifests(FILE_IO).get(0), ids(initialSnapshot.snapshotId(), initialSnapshot.snapshotId()), @@ -178,7 +172,7 @@ public void testAlreadyDeletedFilesAreIgnoredDuringDeletesByRowFilter() { // delete the first data file Snapshot deleteSnapshot = commit(table, table.newDelete().deleteFile(firstDataFile), branch); - Assert.assertEquals("Should have 1 manifest", 1, deleteSnapshot.allManifests(FILE_IO).size()); + assertThat(deleteSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( deleteSnapshot.allManifests(FILE_IO).get(0), ids(deleteSnapshot.snapshotId(), initialSnapshot.snapshotId()), @@ -190,7 +184,7 @@ public void testAlreadyDeletedFilesAreIgnoredDuringDeletesByRowFilter() { Snapshot finalSnapshot = commit(table, table.newDelete().deleteFromRowFilter(Expressions.lessThan("id", 7)), branch); - Assert.assertEquals("Should have 1 manifest", 1, finalSnapshot.allManifests(FILE_IO).size()); + assertThat(finalSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( finalSnapshot.allManifests(FILE_IO).get(0), ids(finalSnapshot.snapshotId()), @@ -198,7 +192,7 @@ public void testAlreadyDeletedFilesAreIgnoredDuringDeletesByRowFilter() { statuses(Status.DELETED)); } - @Test + @TestTemplate public void testDeleteSomeFilesByRowFilterWithoutPartitionPredicates() { // add both data files Snapshot initialSnapshot = @@ -210,7 +204,7 @@ public void testDeleteSomeFilesByRowFilterWithoutPartitionPredicates() { .appendFile(DATA_FILE_BUCKET_0_IDS_8_10), branch); - Assert.assertEquals("Should have 1 manifest", 1, initialSnapshot.allManifests(FILE_IO).size()); + assertThat(initialSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( initialSnapshot.allManifests(FILE_IO).get(0), ids(initialSnapshot.snapshotId(), initialSnapshot.snapshotId()), @@ -222,7 +216,7 @@ public void testDeleteSomeFilesByRowFilterWithoutPartitionPredicates() { commit( table, table.newDelete().deleteFromRowFilter(Expressions.greaterThan("id", 5)), branch); - Assert.assertEquals("Should have 1 manifest", 1, deleteSnapshot.allManifests(FILE_IO).size()); + assertThat(deleteSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( deleteSnapshot.allManifests(FILE_IO).get(0), ids(initialSnapshot.snapshotId(), deleteSnapshot.snapshotId()), @@ -230,7 +224,7 @@ public void testDeleteSomeFilesByRowFilterWithoutPartitionPredicates() { statuses(Status.EXISTING, Status.DELETED)); } - @Test + @TestTemplate public void testDeleteSomeFilesByRowFilterWithCombinedPredicates() { // add both data files Snapshot initialSnapshot = @@ -242,7 +236,7 @@ public void testDeleteSomeFilesByRowFilterWithCombinedPredicates() { .appendFile(DATA_FILE_BUCKET_0_IDS_8_10), branch); - Assert.assertEquals("Should have 1 manifest", 1, initialSnapshot.allManifests(FILE_IO).size()); + assertThat(initialSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( initialSnapshot.allManifests(FILE_IO).get(0), ids(initialSnapshot.snapshotId(), initialSnapshot.snapshotId()), @@ -255,7 +249,7 @@ public void testDeleteSomeFilesByRowFilterWithCombinedPredicates() { Expression predicate = Expressions.and(partPredicate, rowPredicate); Snapshot deleteSnapshot = commit(table, table.newDelete().deleteFromRowFilter(predicate), branch); - Assert.assertEquals("Should have 1 manifest", 1, deleteSnapshot.allManifests(FILE_IO).size()); + assertThat(deleteSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( deleteSnapshot.allManifests(FILE_IO).get(0), ids(initialSnapshot.snapshotId(), deleteSnapshot.snapshotId()), @@ -263,9 +257,9 @@ public void testDeleteSomeFilesByRowFilterWithCombinedPredicates() { statuses(Status.EXISTING, Status.DELETED)); } - @Test + @TestTemplate public void testCannotDeleteFileWhereNotAllRowsMatchPartitionFilter() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); table .updateSpec() @@ -285,7 +279,7 @@ public void testCannotDeleteFileWhereNotAllRowsMatchPartitionFilter() { commit(table, table.newFastAppend().appendFile(dataFile), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -295,18 +289,18 @@ public void testCannotDeleteFileWhereNotAllRowsMatchPartitionFilter() { .hasMessageStartingWith("Cannot delete file where some, but not all, rows match filter"); } - @Test + @TestTemplate public void testDeleteCaseSensitivity() { commit(table, table.newFastAppend().appendFile(DATA_FILE_BUCKET_0_IDS_0_2), branch); Expression rowFilter = Expressions.lessThan("iD", 5); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit(table, table.newDelete().deleteFromRowFilter(rowFilter), branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot find field 'iD'"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -319,7 +313,7 @@ public void testDeleteCaseSensitivity() { commit( table, table.newDelete().deleteFromRowFilter(rowFilter).caseSensitive(false), branch); - Assert.assertEquals("Should have 1 manifest", 1, deleteSnapshot.allManifests(FILE_IO).size()); + assertThat(deleteSnapshot.allManifests(FILE_IO)).hasSize(1); validateManifestEntries( deleteSnapshot.allManifests(FILE_IO).get(0), ids(deleteSnapshot.snapshotId()), @@ -327,7 +321,7 @@ public void testDeleteCaseSensitivity() { statuses(Status.DELETED)); } - @Test + @TestTemplate public void testDeleteFilesOnIndependentBranches() { String testBranch = "testBranch"; table.newAppend().appendFile(FILE_A).appendFile(FILE_B).appendFile(FILE_C).commit(); @@ -355,7 +349,7 @@ public void testDeleteFilesOnIndependentBranches() { statuses(Status.EXISTING, Status.DELETED, Status.DELETED)); } - @Test + @TestTemplate public void testDeleteWithCollision() { Schema schema = new Schema(Types.NestedField.of(0, false, "x", Types.StringType.get())); PartitionSpec spec = PartitionSpec.builderFor(schema).identity("x").build(); @@ -367,9 +361,8 @@ public void testDeleteWithCollision() { PartitionData partitionTwo = new PartitionData(spec.partitionType()); partitionTwo.set(0, "BB"); - Assert.assertEquals( - StructLikeWrapper.forType(spec.partitionType()).set(partitionOne).hashCode(), - StructLikeWrapper.forType(spec.partitionType()).set(partitionTwo).hashCode()); + assertThat(StructLikeWrapper.forType(spec.partitionType()).set(partitionTwo).hashCode()) + .isEqualTo(StructLikeWrapper.forType(spec.partitionType()).set(partitionOne).hashCode()); DataFile testFileOne = DataFiles.builder(spec) @@ -394,10 +387,7 @@ public void testDeleteWithCollision() { .map(s -> ((PartitionData) s.partition()).copy()) .collect(Collectors.toList()); - Assert.assertEquals( - "We should have both partitions", - ImmutableList.of(partitionOne, partitionTwo), - beforeDeletePartitions); + assertThat(beforeDeletePartitions).containsExactly(partitionOne, partitionTwo); collisionTable.newDelete().deleteFromRowFilter(Expressions.equal("x", "BB")).commit(); @@ -406,13 +396,10 @@ public void testDeleteWithCollision() { .map(s -> ((PartitionData) s.partition()).copy()) .collect(Collectors.toList()); - Assert.assertEquals( - "We should have deleted partitionTwo", - ImmutableList.of(partitionOne), - afterDeletePartitions); + assertThat(afterDeletePartitions).containsExactly(partitionOne); } - @Test + @TestTemplate public void testDeleteValidateFileExistence() { commit(table, table.newFastAppend().appendFile(FILE_B), branch); Snapshot delete = @@ -423,12 +410,12 @@ public void testDeleteValidateFileExistence() { files(FILE_B), statuses(Status.DELETED)); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit(table, table.newDelete().deleteFile(FILE_B).validateFilesExist(), branch)) .isInstanceOf(ValidationException.class); } - @Test + @TestTemplate public void testDeleteFilesNoValidation() { commit(table, table.newFastAppend().appendFile(FILE_B), branch); Snapshot delete1 = commit(table, table.newDelete().deleteFile(FILE_B), branch); @@ -439,8 +426,8 @@ public void testDeleteFilesNoValidation() { statuses(Status.DELETED)); Snapshot delete2 = commit(table, table.newDelete().deleteFile(FILE_B), branch); - Assertions.assertThat(delete2.allManifests(FILE_IO).isEmpty()).isTrue(); - Assertions.assertThat(delete2.removedDataFiles(FILE_IO).iterator().hasNext()).isFalse(); + assertThat(delete2.allManifests(FILE_IO)).isEmpty(); + assertThat(delete2.removedDataFiles(FILE_IO)).isEmpty(); } private static ByteBuffer longToBuffer(long value) { diff --git a/core/src/test/java/org/apache/iceberg/TestFastAppend.java b/core/src/test/java/org/apache/iceberg/TestFastAppend.java index a871b4e00c24..c3fc710ebffb 100644 --- a/core/src/test/java/org/apache/iceberg/TestFastAppend.java +++ b/core/src/test/java/org/apache/iceberg/TestFastAppend.java @@ -18,8 +18,12 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.io.File; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -28,31 +32,23 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestFastAppend extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } - - public TestFastAppend(int formatVersion) { - super(formatVersion); +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestFastAppend extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - @Test + @TestTemplate public void testEmptyTableAppend() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals( - "Table should start with last-sequence-number 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); @@ -68,14 +64,13 @@ public void testEmptyTableAppend() { "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); } - @Test + @TestTemplate public void testEmptyTableAppendManifest() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals( - "Table should start with last-sequence-number 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); ManifestFile manifest = writeManifest(FILE_A, FILE_B); table.newFastAppend().appendManifest(manifest).commit(); @@ -86,16 +81,13 @@ public void testEmptyTableAppendManifest() throws IOException { ManifestFile committedManifest = Iterables.getOnlyElement(snap.allManifests(FILE_IO)); if (formatVersion == 1) { - Assertions.assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); + assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); } else { - Assertions.assertThat(committedManifest.path()).isEqualTo(manifest.path()); + assertThat(committedManifest.path()).isEqualTo(manifest.path()); } // validate that the metadata summary is correct when using appendManifest - Assert.assertEquals( - "Summary metadata should include 2 added files", - "2", - snap.summary().get("added-data-files")); + assertThat(snap.summary()).containsEntry("added-data-files", "2"); V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap.sequenceNumber()); V2Assert.assertEquals( @@ -105,14 +97,13 @@ public void testEmptyTableAppendManifest() throws IOException { "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); } - @Test + @TestTemplate public void testEmptyTableAppendFilesAndManifest() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals( - "Table should start with last-sequence-number 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); ManifestFile manifest = writeManifest(FILE_A, FILE_B); table.newFastAppend().appendFile(FILE_C).appendFile(FILE_D).appendManifest(manifest).commit(); @@ -135,9 +126,9 @@ public void testEmptyTableAppendFilesAndManifest() throws IOException { files(FILE_A, FILE_B)); if (formatVersion == 1) { - Assertions.assertThat(snap.allManifests(FILE_IO).get(1).path()).isNotEqualTo(manifest.path()); + assertThat(snap.allManifests(FILE_IO).get(1).path()).isNotEqualTo(manifest.path()); } else { - Assertions.assertThat(snap.allManifests(FILE_IO).get(1).path()).isEqualTo(manifest.path()); + assertThat(snap.allManifests(FILE_IO).get(1).path()).isEqualTo(manifest.path()); } V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap.sequenceNumber()); @@ -148,35 +139,32 @@ public void testEmptyTableAppendFilesAndManifest() throws IOException { "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); } - @Test + @TestTemplate public void testNonEmptyTableAppend() { table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); TableMetadata base = readMetadata(); - Assert.assertNotNull("Should have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNotNull(); List v2manifests = base.currentSnapshot().allManifests(FILE_IO); - Assert.assertEquals("Should have one existing manifest", 1, v2manifests.size()); + assertThat(v2manifests).hasSize(1); // prepare a new append Snapshot pending = table.newFastAppend().appendFile(FILE_C).appendFile(FILE_D).apply(); - Assert.assertNotEquals( - "Snapshots should have unique IDs", - base.currentSnapshot().snapshotId(), - pending.snapshotId()); + assertThat(pending.snapshotId()).isNotEqualTo(base.currentSnapshot().snapshotId()); validateSnapshot(base.currentSnapshot(), pending, FILE_C, FILE_D); } - @Test + @TestTemplate public void testNoMerge() { table.newAppend().appendFile(FILE_A).commit(); table.newFastAppend().appendFile(FILE_B).commit(); TableMetadata base = readMetadata(); - Assert.assertNotNull("Should have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNotNull(); List v3manifests = base.currentSnapshot().allManifests(FILE_IO); - Assert.assertEquals("Should have 2 existing manifests", 2, v3manifests.size()); + assertThat(v3manifests).hasSize(2); // prepare a new append Snapshot pending = table.newFastAppend().appendFile(FILE_C).appendFile(FILE_D).apply(); @@ -186,12 +174,12 @@ public void testNoMerge() { ids.add(snapshot.snapshotId()); } ids.add(pending.snapshotId()); - Assert.assertEquals("Snapshots should have 3 unique IDs", 3, ids.size()); + assertThat(ids).hasSize(3); validateSnapshot(base.currentSnapshot(), pending, FILE_C, FILE_D); } - @Test + @TestTemplate public void testRefreshBeforeApply() { // load a new copy of the table that will not be refreshed by the commit Table stale = load(); @@ -199,9 +187,9 @@ public void testRefreshBeforeApply() { table.newAppend().appendFile(FILE_A).commit(); TableMetadata base = readMetadata(); - Assert.assertNotNull("Should have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNotNull(); List v2manifests = base.currentSnapshot().allManifests(FILE_IO); - Assert.assertEquals("Should have 1 existing manifest", 1, v2manifests.size()); + assertThat(v2manifests).hasSize(1); // commit from the stale table AppendFiles append = stale.newFastAppend().appendFile(FILE_D); @@ -211,7 +199,7 @@ public void testRefreshBeforeApply() { validateSnapshot(base.currentSnapshot(), pending, FILE_D); } - @Test + @TestTemplate public void testRefreshBeforeCommit() { // commit from the stale table AppendFiles append = table.newFastAppend().appendFile(FILE_D); @@ -222,9 +210,9 @@ public void testRefreshBeforeCommit() { table.newAppend().appendFile(FILE_A).commit(); TableMetadata base = readMetadata(); - Assert.assertNotNull("Should have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNotNull(); List v2manifests = base.currentSnapshot().allManifests(FILE_IO); - Assert.assertEquals("Should have 1 existing manifest", 1, v2manifests.size()); + assertThat(v2manifests).hasSize(1); append.commit(); @@ -236,13 +224,10 @@ public void testRefreshBeforeCommit() { List committedManifests = Lists.newArrayList(committed.currentSnapshot().allManifests(FILE_IO)); committedManifests.removeAll(base.currentSnapshot().allManifests(FILE_IO)); - Assert.assertEquals( - "Should reused manifest created by apply", - pending.allManifests(FILE_IO).get(0), - committedManifests.get(0)); + assertThat(committedManifests.get(0)).isEqualTo(pending.allManifests(FILE_IO).get(0)); } - @Test + @TestTemplate public void testFailure() { // inject 5 failures TestTables.TestTableOperations ops = table.ops(); @@ -251,16 +236,16 @@ public void testFailure() { AppendFiles append = table.newFastAppend().appendFile(FILE_B); Snapshot pending = append.apply(); ManifestFile newManifest = pending.allManifests(FILE_IO).get(0); - Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); - Assertions.assertThatThrownBy(append::commit) + assertThatThrownBy(append::commit) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); - Assert.assertFalse("Should clean up new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).doesNotExist(); } - @Test + @TestTemplate public void testAppendManifestCleanup() throws IOException { // inject 5 failures TestTables.TestTableOperations ops = table.ops(); @@ -270,25 +255,25 @@ public void testAppendManifestCleanup() throws IOException { AppendFiles append = table.newFastAppend().appendManifest(manifest); Snapshot pending = append.apply(); ManifestFile newManifest = pending.allManifests(FILE_IO).get(0); - Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); if (formatVersion == 1) { - Assertions.assertThat(newManifest.path()).isNotEqualTo(manifest.path()); + assertThat(newManifest.path()).isNotEqualTo(manifest.path()); } else { - Assertions.assertThat(newManifest.path()).isEqualTo(manifest.path()); + assertThat(newManifest.path()).isEqualTo(manifest.path()); } - Assertions.assertThatThrownBy(append::commit) + assertThatThrownBy(append::commit) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); if (formatVersion == 1) { - Assertions.assertThat(new File(newManifest.path())).doesNotExist(); + assertThat(new File(newManifest.path())).doesNotExist(); } else { - Assertions.assertThat(new File(newManifest.path())).exists(); + assertThat(new File(newManifest.path())).exists(); } } - @Test + @TestTemplate public void testRecoveryWithManifestList() { table.updateProperties().set(TableProperties.MANIFEST_LISTS_ENABLED, "true").commit(); @@ -299,20 +284,18 @@ public void testRecoveryWithManifestList() { AppendFiles append = table.newFastAppend().appendFile(FILE_B); Snapshot pending = append.apply(); ManifestFile newManifest = pending.allManifests(FILE_IO).get(0); - Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); append.commit(); TableMetadata metadata = readMetadata(); validateSnapshot(null, metadata.currentSnapshot(), FILE_B); - Assert.assertTrue("Should commit same new manifest", new File(newManifest.path()).exists()); - Assert.assertTrue( - "Should commit the same new manifest", - metadata.currentSnapshot().allManifests(FILE_IO).contains(newManifest)); + assertThat(new File(newManifest.path())).exists(); + assertThat(metadata.currentSnapshot().allManifests(FILE_IO)).contains(newManifest); } - @Test + @TestTemplate public void testRecoveryWithoutManifestList() { table.updateProperties().set(TableProperties.MANIFEST_LISTS_ENABLED, "false").commit(); @@ -323,27 +306,25 @@ public void testRecoveryWithoutManifestList() { AppendFiles append = table.newFastAppend().appendFile(FILE_B); Snapshot pending = append.apply(); ManifestFile newManifest = pending.allManifests(FILE_IO).get(0); - Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); append.commit(); TableMetadata metadata = readMetadata(); validateSnapshot(null, metadata.currentSnapshot(), FILE_B); - Assert.assertTrue("Should commit same new manifest", new File(newManifest.path()).exists()); - Assert.assertTrue( - "Should commit the same new manifest", - metadata.currentSnapshot().allManifests(FILE_IO).contains(newManifest)); + assertThat(new File(newManifest.path())).exists(); + assertThat(metadata.currentSnapshot().allManifests(FILE_IO)).contains(newManifest); } - @Test + @TestTemplate public void testAppendManifestWithSnapshotIdInheritance() throws IOException { table.updateProperties().set(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, "true").commit(); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNull(); ManifestFile manifest = writeManifest(FILE_A, FILE_B); table.newFastAppend().appendManifest(manifest).commit(); @@ -351,7 +332,7 @@ public void testAppendManifestWithSnapshotIdInheritance() throws IOException { Snapshot snapshot = table.currentSnapshot(); List manifests = table.currentSnapshot().allManifests(FILE_IO); ManifestFile committedManifest = Iterables.getOnlyElement(manifests); - Assertions.assertThat(committedManifest.path()).isEqualTo(manifest.path()); + assertThat(committedManifest.path()).isEqualTo(manifest.path()); validateManifestEntries( manifests.get(0), @@ -360,32 +341,21 @@ public void testAppendManifestWithSnapshotIdInheritance() throws IOException { statuses(Status.ADDED, Status.ADDED)); // validate that the metadata summary is correct when using appendManifest - Assert.assertEquals( - "Summary metadata should include 2 added files", - "2", - snapshot.summary().get("added-data-files")); - Assert.assertEquals( - "Summary metadata should include 2 added records", - "2", - snapshot.summary().get("added-records")); - Assert.assertEquals( - "Summary metadata should include 2 files in total", - "2", - snapshot.summary().get("total-data-files")); - Assert.assertEquals( - "Summary metadata should include 2 records in total", - "2", - snapshot.summary().get("total-records")); + assertThat(snapshot.summary()) + .containsEntry("added-data-files", "2") + .containsEntry("added-records", "2") + .containsEntry("total-data-files", "2") + .containsEntry("total-records", "2"); } - @Test + @TestTemplate public void testAppendManifestFailureWithSnapshotIdInheritance() throws IOException { table.updateProperties().set(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, "true").commit(); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNull(); table.updateProperties().set(TableProperties.COMMIT_NUM_RETRIES, "1").commit(); @@ -396,36 +366,36 @@ public void testAppendManifestFailureWithSnapshotIdInheritance() throws IOExcept AppendFiles append = table.newAppend(); append.appendManifest(manifest); - Assertions.assertThatThrownBy(append::commit) + assertThatThrownBy(append::commit) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); - Assert.assertTrue("Append manifest should not be deleted", new File(manifest.path()).exists()); + assertThat(new File(manifest.path())).exists(); } - @Test + @TestTemplate public void testInvalidAppendManifest() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNull(); ManifestFile manifestWithExistingFiles = writeManifest("manifest-file-1.avro", manifestEntry(Status.EXISTING, null, FILE_A)); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> table.newFastAppend().appendManifest(manifestWithExistingFiles).commit()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot append manifest with existing files"); ManifestFile manifestWithDeletedFiles = writeManifest("manifest-file-2.avro", manifestEntry(Status.DELETED, null, FILE_A)); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> table.newFastAppend().appendManifest(manifestWithDeletedFiles).commit()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot append manifest with deleted files"); } - @Test + @TestTemplate public void testPartitionSummariesOnUnpartitionedTable() { Table table = TestTables.create( @@ -447,7 +417,7 @@ public void testPartitionSummariesOnUnpartitionedTable() { .build()) .commit(); - Assertions.assertThat( + assertThat( table.currentSnapshot().summary().keySet().stream() .filter(key -> key.startsWith(SnapshotSummary.CHANGED_PARTITION_PREFIX)) .collect(Collectors.toSet())) @@ -455,7 +425,7 @@ public void testPartitionSummariesOnUnpartitionedTable() { .isEmpty(); } - @Test + @TestTemplate public void testDefaultPartitionSummaries() { table.newFastAppend().appendFile(FILE_A).commit(); @@ -463,23 +433,14 @@ public void testDefaultPartitionSummaries() { table.currentSnapshot().summary().keySet().stream() .filter(key -> key.startsWith(SnapshotSummary.CHANGED_PARTITION_PREFIX)) .collect(Collectors.toSet()); - Assert.assertEquals( - "Should include no partition summaries by default", 0, partitionSummaryKeys.size()); - - String summariesIncluded = - table - .currentSnapshot() - .summary() - .getOrDefault(SnapshotSummary.PARTITION_SUMMARY_PROP, "false"); - Assert.assertEquals( - "Should not set partition-summaries-included to true", "false", summariesIncluded); - - String changedPartitions = - table.currentSnapshot().summary().get(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP); - Assert.assertEquals("Should set changed partition count", "1", changedPartitions); + assertThat(partitionSummaryKeys).isEmpty(); + + assertThat(table.currentSnapshot().summary()) + .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1"); } - @Test + @TestTemplate public void testIncludedPartitionSummaries() { table.updateProperties().set(TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, "1").commit(); @@ -489,32 +450,17 @@ public void testIncludedPartitionSummaries() { table.currentSnapshot().summary().keySet().stream() .filter(key -> key.startsWith(SnapshotSummary.CHANGED_PARTITION_PREFIX)) .collect(Collectors.toSet()); - Assert.assertEquals("Should include a partition summary", 1, partitionSummaryKeys.size()); - - String summariesIncluded = - table - .currentSnapshot() - .summary() - .getOrDefault(SnapshotSummary.PARTITION_SUMMARY_PROP, "false"); - Assert.assertEquals( - "Should set partition-summaries-included to true", "true", summariesIncluded); - - String changedPartitions = - table.currentSnapshot().summary().get(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP); - Assert.assertEquals("Should set changed partition count", "1", changedPartitions); - - String partitionSummary = - table - .currentSnapshot() - .summary() - .get(SnapshotSummary.CHANGED_PARTITION_PREFIX + "data_bucket=0"); - Assert.assertEquals( - "Summary should include 1 file with 1 record that is 10 bytes", - "added-data-files=1,added-records=1,added-files-size=10", - partitionSummary); + assertThat(partitionSummaryKeys).hasSize(1); + + assertThat(table.currentSnapshot().summary()) + .containsEntry(SnapshotSummary.PARTITION_SUMMARY_PROP, "true") + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1") + .containsEntry( + SnapshotSummary.CHANGED_PARTITION_PREFIX + "data_bucket=0", + "added-data-files=1,added-records=1,added-files-size=10"); } - @Test + @TestTemplate public void testIncludedPartitionSummaryLimit() { table.updateProperties().set(TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, "1").commit(); @@ -524,69 +470,56 @@ public void testIncludedPartitionSummaryLimit() { table.currentSnapshot().summary().keySet().stream() .filter(key -> key.startsWith(SnapshotSummary.CHANGED_PARTITION_PREFIX)) .collect(Collectors.toSet()); - Assert.assertEquals( - "Should include no partition summaries, over limit", 0, partitionSummaryKeys.size()); - - String summariesIncluded = - table - .currentSnapshot() - .summary() - .getOrDefault(SnapshotSummary.PARTITION_SUMMARY_PROP, "false"); - Assert.assertEquals( - "Should not set partition-summaries-included to true", "false", summariesIncluded); - - String changedPartitions = - table.currentSnapshot().summary().get(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP); - Assert.assertEquals("Should set changed partition count", "2", changedPartitions); + assertThat(partitionSummaryKeys).isEmpty(); + + assertThat(table.currentSnapshot().summary()) + .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2"); } - @Test + @TestTemplate public void testAppendToExistingBranch() { table.newFastAppend().appendFile(FILE_A).commit(); table.manageSnapshots().createBranch("branch", table.currentSnapshot().snapshotId()).commit(); table.newFastAppend().appendFile(FILE_B).toBranch("branch").commit(); - int branchSnapshot = 2; - Assert.assertEquals(table.currentSnapshot().snapshotId(), 1); - Assert.assertEquals(table.ops().current().ref("branch").snapshotId(), branchSnapshot); + assertThat(table.currentSnapshot().snapshotId()).isEqualTo(1); + assertThat(table.ops().current().ref("branch").snapshotId()).isEqualTo(2); } - @Test + @TestTemplate public void testAppendCreatesBranchIfNeeded() { table.newFastAppend().appendFile(FILE_A).commit(); table.newFastAppend().appendFile(FILE_B).toBranch("branch").commit(); - int branchSnapshot = 2; - Assert.assertEquals(table.currentSnapshot().snapshotId(), 1); - Assert.assertNotNull(table.ops().current().ref("branch")); - Assert.assertEquals(table.ops().current().ref("branch").snapshotId(), branchSnapshot); + assertThat(table.currentSnapshot().snapshotId()).isEqualTo(1); + assertThat(table.ops().current().ref("branch")).isNotNull(); + assertThat(table.ops().current().ref("branch").snapshotId()).isEqualTo(2); } - @Test + @TestTemplate public void testAppendToBranchEmptyTable() { table.newFastAppend().appendFile(FILE_B).toBranch("branch").commit(); - int branchSnapshot = 1; - Assert.assertNull(table.currentSnapshot()); - Assert.assertNotNull(table.ops().current().ref("branch")); - Assert.assertEquals(table.ops().current().ref("branch").snapshotId(), branchSnapshot); + assertThat(table.currentSnapshot()).isNull(); + assertThat(table.ops().current().ref("branch")).isNotNull(); + assertThat(table.ops().current().ref("branch").snapshotId()).isEqualTo(1); } - @Test + @TestTemplate public void testAppendToNullBranchFails() { - Assertions.assertThatThrownBy(() -> table.newFastAppend().appendFile(FILE_A).toBranch(null)) + assertThatThrownBy(() -> table.newFastAppend().appendFile(FILE_A).toBranch(null)) .as("Invalid branch") .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid branch name: null"); } - @Test + @TestTemplate public void testAppendToTagFails() { table.newFastAppend().appendFile(FILE_A).commit(); table.manageSnapshots().createTag("some-tag", table.currentSnapshot().snapshotId()).commit(); - Assertions.assertThatThrownBy( - () -> table.newFastAppend().appendFile(FILE_A).toBranch("some-tag").commit()) + assertThatThrownBy(() -> table.newFastAppend().appendFile(FILE_A).toBranch("some-tag").commit()) .as("Invalid branch") .isInstanceOf(IllegalArgumentException.class) .hasMessage( diff --git a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java index 39c9ac4b6c21..17d6bd5a19ba 100644 --- a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java +++ b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java @@ -20,9 +20,12 @@ import static org.apache.iceberg.relocated.com.google.common.collect.Iterators.concat; import static org.apache.iceberg.util.SnapshotUtil.latestSnapshot; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.concurrent.Executors; @@ -31,55 +34,43 @@ import org.apache.iceberg.ManifestEntry.Status; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestMergeAppend extends TableTestBase { - private final String branch; - - @Parameterized.Parameters(name = "formatVersion = {0}, branch = {1}") - public static Object[] parameters() { - return new Object[][] { - new Object[] {1, "main"}, - new Object[] {1, "testBranch"}, - new Object[] {2, "main"}, - new Object[] {2, "testBranch"} - }; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestMergeAppend extends TestBase { + @Parameter(index = 1) + private String branch; + + @Parameters(name = "formatVersion = {0}, branch = {1}") + protected static List parameters() { + return Arrays.asList( + new Object[] {1, "main"}, + new Object[] {1, "testBranch"}, + new Object[] {2, "main"}, + new Object[] {2, "testBranch"}); } - public TestMergeAppend(int formatVersion, String branch) { - super(formatVersion); - this.branch = branch; - } - - @Test + @TestTemplate public void testEmptyTableAppend() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); Snapshot committedSnapshot = commit(table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B), branch); - Assert.assertNotNull("Should create a snapshot", committedSnapshot); + assertThat(committedSnapshot).isNotNull(); V1Assert.assertEquals( "Last sequence number should be 0", 0, table.ops().current().lastSequenceNumber()); V2Assert.assertEquals( "Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber()); - Assert.assertEquals( - "Should create 1 manifest for initial write", - 1, - committedSnapshot.allManifests(table.io()).size()); + assertThat(committedSnapshot.allManifests(table.io())).hasSize(1); long snapshotId = committedSnapshot.snapshotId(); @@ -92,18 +83,18 @@ public void testEmptyTableAppend() { statuses(Status.ADDED, Status.ADDED)); } - @Test + @TestTemplate public void testEmptyTableAppendManifest() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); ManifestFile manifest = writeManifest(FILE_A, FILE_B); Snapshot committedSnapshot = commit(table, table.newAppend().appendManifest(manifest), branch); - Assert.assertNotNull("Should create a snapshot", committedSnapshot); + assertThat(committedSnapshot).isNotNull(); V1Assert.assertEquals( "Last sequence number should be 0", 0, table.ops().current().lastSequenceNumber()); V2Assert.assertEquals( @@ -111,9 +102,9 @@ public void testEmptyTableAppendManifest() throws IOException { List manifests = committedSnapshot.allManifests(table.io()); ManifestFile committedManifest = Iterables.getOnlyElement(manifests); if (formatVersion == 1) { - Assertions.assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); + assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); } else { - Assertions.assertThat(committedManifest.path()).isEqualTo(manifest.path()); + assertThat(committedManifest.path()).isEqualTo(manifest.path()); } long snapshotId = committedSnapshot.snapshotId(); @@ -126,19 +117,16 @@ public void testEmptyTableAppendManifest() throws IOException { statuses(Status.ADDED, Status.ADDED)); // validate that the metadata summary is correct when using appendManifest - Assert.assertEquals( - "Summary metadata should include 2 added files", - "2", - committedSnapshot.summary().get("added-data-files")); + assertThat(committedSnapshot.summary()).containsEntry("added-data-files", "2"); } - @Test + @TestTemplate public void testEmptyTableAppendFilesAndManifest() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); ManifestFile manifest = writeManifest(FILE_A, FILE_B); Snapshot committedSnapshot = @@ -147,15 +135,12 @@ public void testEmptyTableAppendFilesAndManifest() throws IOException { table.newAppend().appendFile(FILE_C).appendFile(FILE_D).appendManifest(manifest), branch); - Assert.assertNotNull("Should create a snapshot", committedSnapshot); + assertThat(committedSnapshot).isNotNull(); V1Assert.assertEquals( "Last sequence number should be 0", 0, table.ops().current().lastSequenceNumber()); V2Assert.assertEquals( "Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber()); - Assert.assertEquals( - "Should create 2 manifests for initial write", - 2, - committedSnapshot.allManifests(table.io()).size()); + assertThat(committedSnapshot.allManifests(table.io())).hasSize(2); long snapshotId = committedSnapshot.snapshotId(); @@ -163,9 +148,9 @@ public void testEmptyTableAppendFilesAndManifest() throws IOException { ManifestFile committedManifest2 = committedSnapshot.allManifests(table.io()).get(1); if (formatVersion == 1) { - Assertions.assertThat(committedManifest2.path()).isNotEqualTo(manifest.path()); + assertThat(committedManifest2.path()).isNotEqualTo(manifest.path()); } else { - Assertions.assertThat(committedManifest2.path()).isEqualTo(manifest.path()); + assertThat(committedManifest2.path()).isEqualTo(manifest.path()); } validateManifest( @@ -185,13 +170,13 @@ public void testEmptyTableAppendFilesAndManifest() throws IOException { statuses(Status.ADDED, Status.ADDED)); } - @Test + @TestTemplate public void testAppendWithManifestScanExecutor() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); AtomicInteger scanThreadsIndex = new AtomicInteger(0); Snapshot snapshot = commit( @@ -212,20 +197,22 @@ public void testAppendWithManifestScanExecutor() { return thread; })), branch); - Assert.assertTrue("Thread should be created in provided pool", scanThreadsIndex.get() > 0); - Assert.assertNotNull("Should create a snapshot", snapshot); + assertThat(scanThreadsIndex.get()) + .as("Thread should be created in provided pool") + .isGreaterThan(0); + assertThat(snapshot).isNotNull(); } - @Test + @TestTemplate public void testMergeWithAppendFilesAndManifest() throws IOException { // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); ManifestFile manifest = writeManifest(FILE_A, FILE_B); Snapshot committedSnapshot = @@ -234,7 +221,7 @@ public void testMergeWithAppendFilesAndManifest() throws IOException { table.newAppend().appendFile(FILE_C).appendFile(FILE_D).appendManifest(manifest), branch); - Assert.assertNotNull("Should create a snapshot", committedSnapshot); + assertThat(committedSnapshot).isNotNull(); V1Assert.assertEquals( "Last sequence number should be 0", 0, table.ops().current().lastSequenceNumber()); V2Assert.assertEquals( @@ -245,7 +232,7 @@ public void testMergeWithAppendFilesAndManifest() throws IOException { List manifests = committedSnapshot.allManifests(table.io()); ManifestFile committedManifest = Iterables.getOnlyElement(manifests); - Assertions.assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); + assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); validateManifest( committedManifest, @@ -256,17 +243,17 @@ public void testMergeWithAppendFilesAndManifest() throws IOException { statuses(Status.ADDED, Status.ADDED, Status.ADDED, Status.ADDED)); } - @Test + @TestTemplate public void testMergeWithExistingManifest() { // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); Snapshot commitBefore = commit(table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B), branch); - Assert.assertNotNull("Should create a snapshot", commitBefore); + assertThat(commitBefore).isNotNull(); V1Assert.assertEquals( "Last sequence number should be 0", 0, table.ops().current().lastSequenceNumber()); V2Assert.assertEquals( @@ -276,10 +263,7 @@ public void testMergeWithExistingManifest() { long baseId = commitBefore.snapshotId(); validateSnapshot(null, commitBefore, 1, FILE_A, FILE_B); - Assert.assertEquals( - "Should create 1 manifest for initial write", - 1, - commitBefore.allManifests(table.io()).size()); + assertThat(commitBefore.allManifests(table.io())).hasSize(1); ManifestFile initialManifest = commitBefore.allManifests(table.io()).get(0); validateManifest( initialManifest, @@ -296,13 +280,9 @@ public void testMergeWithExistingManifest() { V2Assert.assertEquals( "Last sequence number should be 2", 2, table.ops().current().lastSequenceNumber()); - Assert.assertEquals( - "Should contain 1 merged manifest for second write", - 1, - committedAfter.allManifests(table.io()).size()); + assertThat(committedAfter.allManifests(table.io())).hasSize(1); ManifestFile newManifest = committedAfter.allManifests(table.io()).get(0); - Assert.assertNotEquals( - "Should not contain manifest from initial write", initialManifest, newManifest); + assertThat(newManifest).isNotEqualTo(initialManifest); long snapshotId = committedAfter.snapshotId(); @@ -315,9 +295,9 @@ public void testMergeWithExistingManifest() { statuses(Status.ADDED, Status.ADDED, Status.EXISTING, Status.EXISTING)); } - @Test + @TestTemplate public void testManifestMergeMinCount() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); table .updateProperties() .set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "2") @@ -328,8 +308,8 @@ public void testManifestMergeMinCount() throws IOException { .commit(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); ManifestFile manifest = writeManifestWithName("FILE_A", FILE_A); ManifestFile manifest2 = writeManifestWithName("FILE_C", FILE_C); @@ -351,10 +331,7 @@ public void testManifestMergeMinCount() throws IOException { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); - Assert.assertEquals( - "Should contain 2 merged manifest for first write", - 2, - snap1.allManifests(table.io()).size()); + assertThat(snap1.allManifests(table.io())).hasSize(2); validateManifest( snap1.allManifests(table.io()).get(0), dataSeqs(1L), @@ -391,10 +368,7 @@ public void testManifestMergeMinCount() throws IOException { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); - Assert.assertEquals( - "Should contain 3 merged manifest for second write", - 3, - snap2.allManifests(table.io()).size()); + assertThat(snap2.allManifests(table.io())).hasSize(3); validateManifest( snap2.allManifests(table.io()).get(0), dataSeqs(2L), @@ -418,15 +392,12 @@ public void testManifestMergeMinCount() throws IOException { statuses(Status.EXISTING, Status.EXISTING, Status.EXISTING)); // validate that the metadata summary is correct when using appendManifest - Assert.assertEquals( - "Summary metadata should include 3 added files", - "3", - snap2.summary().get("added-data-files")); + assertThat(snap2.summary()).containsEntry("added-data-files", "3"); } - @Test + @TestTemplate public void testManifestsMergeIntoOne() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); Snapshot snap1 = commit(table, table.newAppend().appendFile(FILE_A), branch); TableMetadata base = readMetadata(); V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap1.sequenceNumber()); @@ -435,7 +406,7 @@ public void testManifestsMergeIntoOne() throws IOException { "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); long commitId1 = snap1.snapshotId(); - Assert.assertEquals("Should contain 1 manifest", 1, snap1.allManifests(table.io()).size()); + assertThat(snap1.allManifests(table.io())).hasSize(1); validateManifest( snap1.allManifests(table.io()).get(0), dataSeqs(1L), @@ -452,7 +423,7 @@ public void testManifestsMergeIntoOne() throws IOException { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); - Assert.assertEquals("Should contain 2 manifests", 2, snap2.allManifests(table.io()).size()); + assertThat(snap2.allManifests(table.io())).hasSize(2); validateManifest( snap2.allManifests(table.io()).get(0), dataSeqs(2L), @@ -484,7 +455,7 @@ public void testManifestsMergeIntoOne() throws IOException { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); - Assert.assertEquals("Should contain 3 manifests", 3, snap3.allManifests(table.io()).size()); + assertThat(snap3.allManifests(table.io())).hasSize(3); long commitId3 = snap3.snapshotId(); validateManifest( snap3.allManifests(table.io()).get(0), @@ -527,8 +498,7 @@ public void testManifestsMergeIntoOne() throws IOException { "Table should end with last-sequence-number 0", 0, base.lastSequenceNumber()); long commitId4 = snap4.snapshotId(); - Assert.assertEquals( - "Should only contains 1 merged manifest", 1, snap4.allManifests(table.io()).size()); + assertThat(snap4.allManifests(table.io())).hasSize(1); validateManifest( snap4.allManifests(table.io()).get(0), dataSeqs(4L, 3L, 2L, 1L), @@ -538,14 +508,14 @@ public void testManifestsMergeIntoOne() throws IOException { statuses(Status.ADDED, Status.EXISTING, Status.EXISTING, Status.EXISTING)); } - @Test + @TestTemplate public void testManifestDoNotMergeMinCount() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); table.updateProperties().set("commit.manifest.min-count-to-merge", "4").commit(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.currentSnapshot()).isNull(); + assertThat(base.lastSequenceNumber()).isEqualTo(0); ManifestFile manifest = writeManifest(FILE_A, FILE_B); ManifestFile manifest2 = writeManifestWithName("FILE_C", FILE_C); @@ -560,14 +530,14 @@ public void testManifestDoNotMergeMinCount() throws IOException { .appendManifest(manifest3), branch); - Assert.assertNotNull("Should create a snapshot", committed); + assertThat(committed).isNotNull(); V1Assert.assertEquals( "Last sequence number should be 0", 0, table.ops().current().lastSequenceNumber()); V2Assert.assertEquals( "Last sequence number should be 1", 1, table.ops().current().lastSequenceNumber()); List manifests = committed.allManifests(table.io()); - Assertions.assertThat(manifests).hasSize(3); + assertThat(manifests).hasSize(3); ManifestFile committedManifest = manifests.get(0); ManifestFile committedManifest2 = manifests.get(1); @@ -576,13 +546,13 @@ public void testManifestDoNotMergeMinCount() throws IOException { long snapshotId = committed.snapshotId(); if (formatVersion == 1) { - Assertions.assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); - Assertions.assertThat(committedManifest2.path()).isNotEqualTo(manifest2.path()); - Assertions.assertThat(committedManifest3.path()).isNotEqualTo(manifest3.path()); + assertThat(committedManifest.path()).isNotEqualTo(manifest.path()); + assertThat(committedManifest2.path()).isNotEqualTo(manifest2.path()); + assertThat(committedManifest3.path()).isNotEqualTo(manifest3.path()); } else { - Assertions.assertThat(committedManifest.path()).isEqualTo(manifest.path()); - Assertions.assertThat(committedManifest2.path()).isEqualTo(manifest2.path()); - Assertions.assertThat(committedManifest3.path()).isEqualTo(manifest3.path()); + assertThat(committedManifest.path()).isEqualTo(manifest.path()); + assertThat(committedManifest2.path()).isEqualTo(manifest2.path()); + assertThat(committedManifest3.path()).isEqualTo(manifest3.path()); } validateManifest( @@ -608,19 +578,16 @@ public void testManifestDoNotMergeMinCount() throws IOException { statuses(Status.ADDED)); // validate that the metadata summary is correct when using appendManifest - Assert.assertEquals( - "Summary metadata should include 4 added files", - "4", - committed.summary().get("added-data-files")); + assertThat(committed.summary()).containsEntry("added-data-files", "4"); } - @Test + @TestTemplate public void testMergeWithExistingManifestAfterDelete() { // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); + assertThat(listManifestFiles()).isEmpty(); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); Snapshot snap = commit(table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B), branch); @@ -628,8 +595,7 @@ public void testMergeWithExistingManifestAfterDelete() { TableMetadata base = readMetadata(); long baseId = snap.snapshotId(); - Assert.assertEquals( - "Should create 1 manifest for initial write", 1, snap.allManifests(table.io()).size()); + assertThat(snap.allManifests(table.io())).hasSize(1); ManifestFile initialManifest = snap.allManifests(table.io()).get(0); validateManifest( initialManifest, @@ -650,10 +616,7 @@ public void testMergeWithExistingManifestAfterDelete() { TableMetadata delete = readMetadata(); long deleteId = latestSnapshot(table, branch).snapshotId(); - Assert.assertEquals( - "Should create 1 filtered manifest for delete", - 1, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(1); ManifestFile deleteManifest = deleteSnapshot.allManifests(table.io()).get(0); validateManifest( @@ -674,13 +637,9 @@ public void testMergeWithExistingManifestAfterDelete() { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Should contain 1 merged manifest for second write", - 1, - committedSnapshot.allManifests(table.io()).size()); + assertThat(committedSnapshot.allManifests(table.io())).hasSize(1); ManifestFile newManifest = committedSnapshot.allManifests(table.io()).get(0); - Assert.assertNotEquals( - "Should not contain manifest from initial write", initialManifest, newManifest); + assertThat(newManifest).isNotEqualTo(initialManifest); long snapshotId = committedSnapshot.snapshotId(); @@ -692,13 +651,13 @@ public void testMergeWithExistingManifestAfterDelete() { statuses(Status.ADDED, Status.ADDED, Status.EXISTING)); } - @Test + @TestTemplate public void testMinMergeCount() { // only merge when there are at least 4 manifests table.updateProperties().set("commit.manifest.min-count-to-merge", "4").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); Snapshot snap1 = commit(table, table.newFastAppend().appendFile(FILE_A), branch); long idFileA = snap1.snapshotId(); @@ -708,18 +667,14 @@ public void testMinMergeCount() { long idFileB = snap2.snapshotId(); validateSnapshot(snap1, snap2, 2, FILE_B); - Assert.assertEquals( - "Should have 2 manifests from setup writes", 2, snap2.allManifests(table.io()).size()); + assertThat(snap2.allManifests(table.io())).hasSize(2); Snapshot snap3 = commit(table, table.newAppend().appendFile(FILE_C), branch); long idFileC = snap3.snapshotId(); validateSnapshot(snap2, snap3, 3, FILE_C); TableMetadata base = readMetadata(); - Assert.assertEquals( - "Should have 3 unmerged manifests", - 3, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(3); Set unmerged = Sets.newHashSet(latestSnapshot(table, branch).allManifests(table.io())); @@ -730,12 +685,10 @@ public void testMinMergeCount() { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Should contain 1 merged manifest after the 4th write", - 1, - committed.allManifests(table.io()).size()); + assertThat(committed.allManifests(table.io())).hasSize(1); + ManifestFile newManifest = committed.allManifests(table.io()).get(0); - Assert.assertFalse("Should not contain previous manifests", unmerged.contains(newManifest)); + assertThat(unmerged).doesNotContain(newManifest); long lastSnapshotId = committed.snapshotId(); @@ -748,13 +701,13 @@ public void testMinMergeCount() { statuses(Status.ADDED, Status.EXISTING, Status.EXISTING, Status.EXISTING)); } - @Test + @TestTemplate public void testMergeSizeTargetWithExistingManifest() { // use a small limit on manifest size to prevent merging table.updateProperties().set(TableProperties.MANIFEST_TARGET_SIZE_BYTES, "10").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); Snapshot snap = commit(table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B), branch); @@ -762,8 +715,7 @@ public void testMergeSizeTargetWithExistingManifest() { TableMetadata base = readMetadata(); long baseId = snap.snapshotId(); - Assert.assertEquals( - "Should create 1 manifest for initial write", 1, snap.allManifests(table.io()).size()); + assertThat(snap.allManifests(table.io())).hasSize(1); ManifestFile initialManifest = snap.allManifests(table.io()).get(0); validateManifest( initialManifest, @@ -782,13 +734,9 @@ public void testMergeSizeTargetWithExistingManifest() { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Should contain 2 unmerged manifests after second write", - 2, - committed.allManifests(table.io()).size()); + assertThat(committed.allManifests(table.io())).hasSize(2); ManifestFile newManifest = committed.allManifests(table.io()).get(0); - Assert.assertNotEquals( - "Should not contain manifest from initial write", initialManifest, newManifest); + assertThat(newManifest).isNotEqualTo(initialManifest); long pendingId = committed.snapshotId(); validateManifest( @@ -808,7 +756,7 @@ public void testMergeSizeTargetWithExistingManifest() { statuses(Status.ADDED, Status.ADDED)); } - @Test + @TestTemplate public void testChangedPartitionSpec() { Snapshot snap = commit(table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B), branch); @@ -816,8 +764,7 @@ public void testChangedPartitionSpec() { validateSnapshot(null, snap, 1, FILE_A, FILE_B); TableMetadata base = readMetadata(); - Assert.assertEquals( - "Should create 1 manifest for initial write", 1, snap.allManifests(table.io()).size()); + assertThat(snap.allManifests(table.io())).hasSize(1); ManifestFile initialManifest = snap.allManifests(table.io()).get(0); validateManifest( initialManifest, @@ -856,8 +803,7 @@ public void testChangedPartitionSpec() { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Should use 2 manifest files", 2, lastSnapshot.allManifests(table.io()).size()); + assertThat(lastSnapshot.allManifests(table.io())).hasSize(2); // new manifest comes first validateManifest( @@ -868,13 +814,13 @@ public void testChangedPartitionSpec() { files(newFileY), statuses(Status.ADDED)); - Assert.assertEquals( - "Second manifest should be the initial manifest with the old spec", - initialManifest, - lastSnapshot.allManifests(table.io()).get(1)); + assertThat(lastSnapshot.allManifests(table.io())) + .as("Second manifest should be the initial manifest with the old spec") + .element(1) + .isEqualTo(initialManifest); } - @Test + @TestTemplate public void testChangedPartitionSpecMergeExisting() { Snapshot snap1 = commit(table, table.newAppend().appendFile(FILE_A), branch); @@ -888,7 +834,7 @@ public void testChangedPartitionSpecMergeExisting() { validateSnapshot(snap1, snap2, 2, FILE_B); TableMetadata base = readMetadata(); - Assert.assertEquals("Should contain 2 manifests", 2, snap2.allManifests(table.io()).size()); + assertThat(snap2.allManifests(table.io())).hasSize(2); ManifestFile manifest = snap2.allManifests(table.io()).get(0); // build the new spec using the table's schema, which uses fresh IDs @@ -919,11 +865,8 @@ public void testChangedPartitionSpecMergeExisting() { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Should use 2 manifest files", 2, lastSnapshot.allManifests(table.io()).size()); - Assert.assertFalse( - "First manifest should not be in the new snapshot", - lastSnapshot.allManifests(table.io()).contains(manifest)); + assertThat(lastSnapshot.allManifests(table.io())).hasSize(2); + assertThat(lastSnapshot.allManifests(table.io())).doesNotContain(manifest); validateManifest( lastSnapshot.allManifests(table.io()).get(0), @@ -941,11 +884,11 @@ public void testChangedPartitionSpecMergeExisting() { statuses(Status.EXISTING, Status.EXISTING)); } - @Test + @TestTemplate public void testFailure() { // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); Snapshot snap = commit(table, table.newAppend().appendFile(FILE_A), branch); @@ -969,16 +912,16 @@ public void testFailure() { AppendFiles append = table.newAppend().appendFile(FILE_B); Snapshot pending = apply(append, branch); - Assert.assertEquals("Should merge to 1 manifest", 1, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(1); ManifestFile newManifest = pending.allManifests(table.io()).get(0); - Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); validateManifest( newManifest, ids(pending.snapshotId(), baseId), concat(files(FILE_B), files(initialManifest))); - Assertions.assertThatThrownBy(() -> commit(table, append, branch)) + assertThatThrownBy(() -> commit(table, append, branch)) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); @@ -986,10 +929,7 @@ public void testFailure() { "Last sequence number should be 1", 1, readMetadata().lastSequenceNumber()); V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Should only contain 1 manifest file", - 1, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(1); validateManifest( latestSnapshot(table, branch).allManifests(table.io()).get(0), @@ -999,10 +939,10 @@ public void testFailure() { files(initialManifest), statuses(Status.ADDED)); - Assert.assertFalse("Should clean up new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).doesNotExist(); } - @Test + @TestTemplate public void testAppendManifestCleanup() throws IOException { // inject 5 failures TestTables.TestTableOperations ops = table.ops(); @@ -1012,14 +952,14 @@ public void testAppendManifestCleanup() throws IOException { AppendFiles append = table.newAppend().appendManifest(manifest); Snapshot pending = apply(append, branch); ManifestFile newManifest = pending.allManifests(table.io()).get(0); - Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); if (formatVersion == 1) { - Assertions.assertThat(newManifest.path()).isNotEqualTo(manifest.path()); + assertThat(newManifest.path()).isNotEqualTo(manifest.path()); } else { - Assertions.assertThat(newManifest.path()).isEqualTo(manifest.path()); + assertThat(newManifest.path()).isEqualTo(manifest.path()); } - Assertions.assertThatThrownBy(() -> commit(table, append, branch)) + assertThatThrownBy(() -> commit(table, append, branch)) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); V2Assert.assertEquals( @@ -1028,18 +968,18 @@ public void testAppendManifestCleanup() throws IOException { "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); if (formatVersion == 1) { - Assertions.assertThat(new File(newManifest.path())).doesNotExist(); + assertThat(new File(newManifest.path())).doesNotExist(); } else { - Assertions.assertThat(new File(newManifest.path())).exists(); + assertThat(new File(newManifest.path())).exists(); } } - @Test + @TestTemplate public void testRecovery() { // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); Snapshot current = commit(table, table.newAppend().appendFile(FILE_A), branch); @@ -1063,10 +1003,10 @@ public void testRecovery() { AppendFiles append = table.newAppend().appendFile(FILE_B); Snapshot pending = apply(append, branch); - Assert.assertEquals("Should merge to 1 manifest", 1, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(1); ManifestFile newManifest = pending.allManifests(table.io()).get(0); - Assert.assertTrue("Should create new manifest", new File(newManifest.path()).exists()); + assertThat(new File(newManifest.path())).exists(); validateManifest( newManifest, ids(pending.snapshotId(), baseId), @@ -1088,14 +1028,10 @@ public void testRecovery() { "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); TableMetadata metadata = readMetadata(); - Assert.assertTrue("Should reuse the new manifest", new File(newManifest.path()).exists()); - Assert.assertEquals( - "Should commit the same new manifest during retry", - Lists.newArrayList(newManifest), - snapshot.allManifests(table.io())); - - Assert.assertEquals( - "Should only contain 1 merged manifest file", 1, snapshot.allManifests(table.io()).size()); + assertThat(new File(newManifest.path())).exists(); + assertThat(snapshot.allManifests(table.io())).containsExactly(newManifest); + + assertThat(snapshot.allManifests(table.io())).hasSize(1); ManifestFile manifestFile = snapshot.allManifests(table.io()).get(0); validateManifest( manifestFile, @@ -1106,15 +1042,15 @@ public void testRecovery() { statuses(Status.ADDED, Status.EXISTING)); } - @Test + @TestTemplate public void testAppendManifestWithSnapshotIdInheritance() throws IOException { table.updateProperties().set(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, "true").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNull(); ManifestFile manifest = writeManifest(FILE_A, FILE_B); Snapshot snapshot = commit(table, table.newAppend().appendManifest(manifest), branch); @@ -1123,9 +1059,9 @@ public void testAppendManifestWithSnapshotIdInheritance() throws IOException { validateSnapshot(null, snapshot, 1, FILE_A, FILE_B); List manifests = snapshot.allManifests(table.io()); - Assert.assertEquals("Should have 1 committed manifest", 1, manifests.size()); + assertThat(manifests).hasSize(1); ManifestFile manifestFile = snapshot.allManifests(table.io()).get(0); - Assertions.assertThat(manifestFile.path()).isEqualTo(manifest.path()); + assertThat(manifestFile.path()).isEqualTo(manifest.path()); validateManifest( manifestFile, dataSeqs(1L, 1L), @@ -1135,33 +1071,22 @@ public void testAppendManifestWithSnapshotIdInheritance() throws IOException { statuses(Status.ADDED, Status.ADDED)); // validate that the metadata summary is correct when using appendManifest - Assert.assertEquals( - "Summary metadata should include 2 added files", - "2", - snapshot.summary().get("added-data-files")); - Assert.assertEquals( - "Summary metadata should include 2 added records", - "2", - snapshot.summary().get("added-records")); - Assert.assertEquals( - "Summary metadata should include 2 files in total", - "2", - snapshot.summary().get("total-data-files")); - Assert.assertEquals( - "Summary metadata should include 2 records in total", - "2", - snapshot.summary().get("total-records")); + assertThat(snapshot.summary()) + .containsEntry("added-data-files", "2") + .containsEntry("added-records", "2") + .containsEntry("total-data-files", "2") + .containsEntry("total-records", "2"); } - @Test + @TestTemplate public void testMergedAppendManifestCleanupWithSnapshotIdInheritance() throws IOException { table.updateProperties().set(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, "true").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNull(); table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); @@ -1171,7 +1096,7 @@ public void testMergedAppendManifestCleanupWithSnapshotIdInheritance() throws IO long commitId1 = snap1.snapshotId(); validateSnapshot(null, snap1, 1, FILE_A, FILE_B); - Assert.assertEquals("Should have only 1 manifest", 1, snap1.allManifests(table.io()).size()); + assertThat(snap1.allManifests(table.io())).hasSize(1); validateManifest( snap1.allManifests(table.io()).get(0), dataSeqs(1L, 1L), @@ -1179,8 +1104,7 @@ public void testMergedAppendManifestCleanupWithSnapshotIdInheritance() throws IO ids(commitId1, commitId1), files(FILE_A, FILE_B), statuses(Status.ADDED, Status.ADDED)); - Assert.assertTrue( - "Unmerged append manifest should not be deleted", new File(manifest1.path()).exists()); + assertThat(new File(manifest1.path())).exists(); ManifestFile manifest2 = writeManifestWithName("manifest-file-2.avro", FILE_C, FILE_D); Snapshot snap2 = commit(table, table.newAppend().appendManifest(manifest2), branch); @@ -1192,8 +1116,7 @@ public void testMergedAppendManifestCleanupWithSnapshotIdInheritance() throws IO V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Manifests should be merged into 1", 1, snap2.allManifests(table.io()).size()); + assertThat(snap2.allManifests(table.io())).hasSize(1); validateManifest( latestSnapshot(table, branch).allManifests(table.io()).get(0), dataSeqs(2L, 2L, 1L, 1L), @@ -1202,19 +1125,18 @@ public void testMergedAppendManifestCleanupWithSnapshotIdInheritance() throws IO files(FILE_C, FILE_D, FILE_A, FILE_B), statuses(Status.ADDED, Status.ADDED, Status.EXISTING, Status.EXISTING)); - Assert.assertFalse( - "Merged append manifest should be deleted", new File(manifest2.path()).exists()); + assertThat(new File(manifest2.path())).doesNotExist(); } - @Test + @TestTemplate public void testAppendManifestFailureWithSnapshotIdInheritance() throws IOException { table.updateProperties().set(TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED, "true").commit(); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNull(); table.updateProperties().set(TableProperties.COMMIT_NUM_RETRIES, "1").commit(); @@ -1225,40 +1147,40 @@ public void testAppendManifestFailureWithSnapshotIdInheritance() throws IOExcept AppendFiles append = table.newAppend(); append.appendManifest(manifest); - Assertions.assertThatThrownBy(() -> commit(table, append, branch)) + assertThatThrownBy(() -> commit(table, append, branch)) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertTrue("Append manifest should not be deleted", new File(manifest.path()).exists()); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); + assertThat(new File(manifest.path())).exists(); } - @Test + @TestTemplate public void testInvalidAppendManifest() throws IOException { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); + assertThat(base.currentSnapshot()).isNull(); ManifestFile manifestWithExistingFiles = writeManifest("manifest-file-1.avro", manifestEntry(Status.EXISTING, null, FILE_A)); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit(table, table.newAppend().appendManifest(manifestWithExistingFiles), branch)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot append manifest with existing files"); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); ManifestFile manifestWithDeletedFiles = writeManifest("manifest-file-2.avro", manifestEntry(Status.DELETED, null, FILE_A)); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit(table, table.newAppend().appendManifest(manifestWithDeletedFiles), branch)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot append manifest with deleted files"); - Assert.assertEquals("Last sequence number should be 0", 0, readMetadata().lastSequenceNumber()); + assertThat(readMetadata().lastSequenceNumber()).isEqualTo(0); } - @Test + @TestTemplate public void testUpdatePartitionSpecFieldIdsForV1Table() { TableMetadata base = readMetadata(); @@ -1273,35 +1195,35 @@ public void testUpdatePartitionSpecFieldIdsForV1Table() { // commit the new partition spec to the table manually table.ops().commit(base, base.updatePartitionSpec(newSpec)); - Assert.assertEquals("Last sequence number should be 0", 0, base.lastSequenceNumber()); + assertThat(base.lastSequenceNumber()).isEqualTo(0); List partitionSpecs = table.ops().current().specs(); PartitionSpec partitionSpec = partitionSpecs.get(0); - Assert.assertEquals(1000, partitionSpec.lastAssignedFieldId()); + assertThat(partitionSpec.lastAssignedFieldId()).isEqualTo(1000); Types.StructType structType = partitionSpec.partitionType(); List fields = structType.fields(); - Assert.assertEquals(1, fields.size()); - Assert.assertEquals("data_bucket", fields.get(0).name()); - Assert.assertEquals(1000, fields.get(0).fieldId()); + assertThat(fields).hasSize(1); + assertThat(fields.get(0).name()).isEqualTo("data_bucket"); + assertThat(fields.get(0).fieldId()).isEqualTo(1000); partitionSpec = partitionSpecs.get(1); - Assert.assertEquals(1003, partitionSpec.lastAssignedFieldId()); + assertThat(partitionSpec.lastAssignedFieldId()).isEqualTo(1003); structType = partitionSpec.partitionType(); fields = structType.fields(); - Assert.assertEquals(4, fields.size()); - Assert.assertEquals("id_bucket", fields.get(0).name()); - Assert.assertEquals(1000, fields.get(0).fieldId()); - Assert.assertEquals("data", fields.get(1).name()); - Assert.assertEquals(1001, fields.get(1).fieldId()); - Assert.assertEquals("data_bucket", fields.get(2).name()); - Assert.assertEquals(1002, fields.get(2).fieldId()); - Assert.assertEquals("data_partition", fields.get(3).name()); - Assert.assertEquals(1003, fields.get(3).fieldId()); + assertThat(fields).hasSize(4); + assertThat(fields.get(0).name()).isEqualTo("id_bucket"); + assertThat(fields.get(0).fieldId()).isEqualTo(1000); + assertThat(fields.get(1).name()).isEqualTo("data"); + assertThat(fields.get(1).fieldId()).isEqualTo(1001); + assertThat(fields.get(2).name()).isEqualTo("data_bucket"); + assertThat(fields.get(2).fieldId()).isEqualTo(1002); + assertThat(fields.get(3).name()).isEqualTo("data_partition"); + assertThat(fields.get(3).fieldId()).isEqualTo(1003); } - @Test + @TestTemplate public void testManifestEntryFieldIdsForChangedPartitionSpecForV1Table() { Snapshot snap = commit(table, table.newAppend().appendFile(FILE_A), branch); @@ -1309,8 +1231,7 @@ public void testManifestEntryFieldIdsForChangedPartitionSpecForV1Table() { validateSnapshot(null, snap, 1, FILE_A); TableMetadata base = readMetadata(); - Assert.assertEquals( - "Should create 1 manifest for initial write", 1, snap.allManifests(table.io()).size()); + assertThat(snap.allManifests(table.io())).hasSize(1); ManifestFile initialManifest = snap.allManifests(table.io()).get(0); validateManifest( initialManifest, @@ -1349,8 +1270,7 @@ public void testManifestEntryFieldIdsForChangedPartitionSpecForV1Table() { V1Assert.assertEquals( "Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - Assert.assertEquals( - "Should use 2 manifest files", 2, committedSnapshot.allManifests(table.io()).size()); + assertThat(committedSnapshot.allManifests(table.io())).hasSize(2); // new manifest comes first validateManifest( @@ -1361,10 +1281,10 @@ public void testManifestEntryFieldIdsForChangedPartitionSpecForV1Table() { files(newFile), statuses(Status.ADDED)); - Assert.assertEquals( - "Second manifest should be the initial manifest with the old spec", - initialManifest, - committedSnapshot.allManifests(table.io()).get(1)); + assertThat(committedSnapshot.allManifests(table.io())) + .as("Second manifest should be the initial manifest with the old spec") + .element(1) + .isEqualTo(initialManifest); // field ids of manifest entries in two manifests with different specs of the same source field // should be different @@ -1375,11 +1295,11 @@ public void testManifestEntryFieldIdsForChangedPartitionSpecForV1Table() { .next(); Types.NestedField field = ((PartitionData) entry.file().partition()).getPartitionType().fields().get(0); - Assert.assertEquals(1000, field.fieldId()); - Assert.assertEquals("id_bucket", field.name()); + assertThat(field.fieldId()).isEqualTo(1000); + assertThat(field.name()).isEqualTo("id_bucket"); field = ((PartitionData) entry.file().partition()).getPartitionType().fields().get(1); - Assert.assertEquals(1001, field.fieldId()); - Assert.assertEquals("data_bucket", field.name()); + assertThat(field.fieldId()).isEqualTo(1001); + assertThat(field.name()).isEqualTo("data_bucket"); entry = ManifestFiles.read(committedSnapshot.allManifests(table.io()).get(1), FILE_IO) @@ -1387,11 +1307,11 @@ public void testManifestEntryFieldIdsForChangedPartitionSpecForV1Table() { .iterator() .next(); field = ((PartitionData) entry.file().partition()).getPartitionType().fields().get(0); - Assert.assertEquals(1000, field.fieldId()); - Assert.assertEquals("data_bucket", field.name()); + assertThat(field.fieldId()).isEqualTo(1000); + assertThat(field.name()).isEqualTo("data_bucket"); } - @Test + @TestTemplate public void testDefaultPartitionSummaries() { table.newFastAppend().appendFile(FILE_A).commit(); @@ -1399,23 +1319,14 @@ public void testDefaultPartitionSummaries() { table.currentSnapshot().summary().keySet().stream() .filter(key -> key.startsWith(SnapshotSummary.CHANGED_PARTITION_PREFIX)) .collect(Collectors.toSet()); - Assert.assertEquals( - "Should include no partition summaries by default", 0, partitionSummaryKeys.size()); - - String summariesIncluded = - table - .currentSnapshot() - .summary() - .getOrDefault(SnapshotSummary.PARTITION_SUMMARY_PROP, "false"); - Assert.assertEquals( - "Should not set partition-summaries-included to true", "false", summariesIncluded); - - String changedPartitions = - table.currentSnapshot().summary().get(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP); - Assert.assertEquals("Should set changed partition count", "1", changedPartitions); + assertThat(partitionSummaryKeys).isEmpty(); + + assertThat(table.currentSnapshot().summary()) + .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1"); } - @Test + @TestTemplate public void testIncludedPartitionSummaries() { table.updateProperties().set(TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, "1").commit(); @@ -1425,32 +1336,17 @@ public void testIncludedPartitionSummaries() { table.currentSnapshot().summary().keySet().stream() .filter(key -> key.startsWith(SnapshotSummary.CHANGED_PARTITION_PREFIX)) .collect(Collectors.toSet()); - Assert.assertEquals("Should include a partition summary", 1, partitionSummaryKeys.size()); - - String summariesIncluded = - table - .currentSnapshot() - .summary() - .getOrDefault(SnapshotSummary.PARTITION_SUMMARY_PROP, "false"); - Assert.assertEquals( - "Should set partition-summaries-included to true", "true", summariesIncluded); - - String changedPartitions = - table.currentSnapshot().summary().get(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP); - Assert.assertEquals("Should set changed partition count", "1", changedPartitions); - - String partitionSummary = - table - .currentSnapshot() - .summary() - .get(SnapshotSummary.CHANGED_PARTITION_PREFIX + "data_bucket=0"); - Assert.assertEquals( - "Summary should include 1 file with 1 record that is 10 bytes", - "added-data-files=1,added-records=1,added-files-size=10", - partitionSummary); + assertThat(partitionSummaryKeys).hasSize(1); + + assertThat(table.currentSnapshot().summary()) + .containsEntry(SnapshotSummary.PARTITION_SUMMARY_PROP, "true") + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "1") + .containsEntry( + SnapshotSummary.CHANGED_PARTITION_PREFIX + "data_bucket=0", + "added-data-files=1,added-records=1,added-files-size=10"); } - @Test + @TestTemplate public void testIncludedPartitionSummaryLimit() { table.updateProperties().set(TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, "1").commit(); @@ -1460,19 +1356,10 @@ public void testIncludedPartitionSummaryLimit() { table.currentSnapshot().summary().keySet().stream() .filter(key -> key.startsWith(SnapshotSummary.CHANGED_PARTITION_PREFIX)) .collect(Collectors.toSet()); - Assert.assertEquals( - "Should include no partition summaries, over limit", 0, partitionSummaryKeys.size()); - - String summariesIncluded = - table - .currentSnapshot() - .summary() - .getOrDefault(SnapshotSummary.PARTITION_SUMMARY_PROP, "false"); - Assert.assertEquals( - "Should not set partition-summaries-included to true", "false", summariesIncluded); - - String changedPartitions = - table.currentSnapshot().summary().get(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP); - Assert.assertEquals("Should set changed partition count", "2", changedPartitions); + assertThat(partitionSummaryKeys).isEmpty(); + + assertThat(table.currentSnapshot().summary()) + .doesNotContainKey(SnapshotSummary.PARTITION_SUMMARY_PROP) + .containsEntry(SnapshotSummary.CHANGED_PARTITION_COUNT_PROP, "2"); } } diff --git a/core/src/test/java/org/apache/iceberg/TestOverwrite.java b/core/src/test/java/org/apache/iceberg/TestOverwrite.java index e5665d6714fc..15b5547cc456 100644 --- a/core/src/test/java/org/apache/iceberg/TestOverwrite.java +++ b/core/src/test/java/org/apache/iceberg/TestOverwrite.java @@ -24,24 +24,26 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; import static org.apache.iceberg.util.SnapshotUtil.latestSnapshot; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.ManifestEntry.Status; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestOverwrite extends TableTestBase { +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestOverwrite extends TestBase { private static final Schema DATE_SCHEMA = new Schema( required(1, "id", Types.LongType.get()), @@ -104,21 +106,16 @@ public class TestOverwrite extends TableTestBase { )) .build(); - private final String branch; - - @Parameterized.Parameters(name = "formatVersion = {0}, branch = {1}") - public static Object[] parameters() { - return new Object[][] { - new Object[] {1, "main"}, - new Object[] {1, "testBranch"}, - new Object[] {2, "main"}, - new Object[] {2, "testBranch"} - }; - } + @Parameter(index = 1) + private String branch; - public TestOverwrite(int formatVersion, String branch) { - super(formatVersion); - this.branch = branch; + @Parameters(name = "formatVersion = {0}, branch = {1}") + protected static List parameters() { + return Arrays.asList( + new Object[] {1, "main"}, + new Object[] {1, "testBranch"}, + new Object[] {2, "main"}, + new Object[] {2, "testBranch"}); } private static ByteBuffer longToBuffer(long value) { @@ -127,10 +124,10 @@ private static ByteBuffer longToBuffer(long value) { private Table table = null; - @Before + @BeforeEach public void createTestTable() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); this.table = TestTables.create(tableDir, TABLE_NAME, DATE_SCHEMA, PARTITION_BY_DATE, formatVersion); @@ -138,7 +135,7 @@ public void createTestTable() throws IOException { commit(table, table.newAppend().appendFile(FILE_0_TO_4).appendFile(FILE_5_TO_9), branch); } - @Test + @TestTemplate public void testOverwriteWithoutAppend() { TableMetadata base = TestTables.readMetadata(TABLE_NAME); long baseId = latestSnapshot(base, branch).snapshotId(); @@ -147,11 +144,8 @@ public void testOverwriteWithoutAppend() { long overwriteId = latestSnapshot(table, branch).snapshotId(); - Assert.assertNotEquals("Should create a new snapshot", baseId, overwriteId); - Assert.assertEquals( - "Table should have one manifest", - 1, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(overwriteId).isNotEqualTo(baseId); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(1); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), @@ -160,7 +154,7 @@ public void testOverwriteWithoutAppend() { statuses(Status.DELETED, Status.EXISTING)); } - @Test + @TestTemplate public void testOverwriteFailsDelete() { TableMetadata base = TestTables.readMetadata(TABLE_NAME); long baseId = @@ -171,15 +165,14 @@ public void testOverwriteFailsDelete() { .newOverwrite() .overwriteByRowFilter(and(equal("date", "2018-06-09"), lessThan("id", 9))); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot delete file where some, but not all, rows match filter"); - Assert.assertEquals( - "Should not create a new snapshot", baseId, latestSnapshot(base, branch).snapshotId()); + assertThat(latestSnapshot(base, branch).snapshotId()).isEqualTo(baseId); } - @Test + @TestTemplate public void testOverwriteWithAppendOutsideOfDelete() { TableMetadata base = TestTables.readMetadata(TABLE_NAME); Snapshot latestSnapshot = latestSnapshot(base, branch); @@ -195,11 +188,8 @@ public void testOverwriteWithAppendOutsideOfDelete() { long overwriteId = latestSnapshot(table, branch).snapshotId(); - Assert.assertNotEquals("Should create a new snapshot", baseId, overwriteId); - Assert.assertEquals( - "Table should have 2 manifests", - 2, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(overwriteId).isNotEqualTo(baseId); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(2); // manifest is not merged because it is less than the minimum validateManifestEntries( @@ -215,7 +205,7 @@ public void testOverwriteWithAppendOutsideOfDelete() { statuses(Status.DELETED, Status.EXISTING)); } - @Test + @TestTemplate public void testOverwriteWithMergedAppendOutsideOfDelete() { // ensure the overwrite results in a merge table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); @@ -234,11 +224,8 @@ public void testOverwriteWithMergedAppendOutsideOfDelete() { long overwriteId = latestSnapshot(table, branch).snapshotId(); - Assert.assertNotEquals("Should create a new snapshot", baseId, overwriteId); - Assert.assertEquals( - "Table should have one merged manifest", - 1, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(overwriteId).isNotEqualTo(baseId); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(1); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), @@ -247,7 +234,7 @@ public void testOverwriteWithMergedAppendOutsideOfDelete() { statuses(Status.ADDED, Status.DELETED, Status.EXISTING)); } - @Test + @TestTemplate public void testValidatedOverwriteWithAppendOutsideOfDelete() { // ensure the overwrite results in a merge table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); @@ -263,15 +250,14 @@ public void testValidatedOverwriteWithAppendOutsideOfDelete() { .addFile(FILE_10_TO_14) // in 2018-06-09, NOT in 2018-06-08 .validateAddedFilesMatchOverwriteFilter(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot append file with rows that do not match filter"); - Assert.assertEquals( - "Should not create a new snapshot", baseId, latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isEqualTo(baseId); } - @Test + @TestTemplate public void testValidatedOverwriteWithAppendOutsideOfDeleteMetrics() { TableMetadata base = TestTables.readMetadata(TABLE_NAME); long baseId = @@ -284,15 +270,14 @@ public void testValidatedOverwriteWithAppendOutsideOfDeleteMetrics() { .addFile(FILE_10_TO_14) // in 2018-06-09 matches, but IDs are outside range .validateAddedFilesMatchOverwriteFilter(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot append file with rows that do not match filter"); - Assert.assertEquals( - "Should not create a new snapshot", baseId, latestSnapshot(base, branch).snapshotId()); + assertThat(latestSnapshot(base, branch).snapshotId()).isEqualTo(baseId); } - @Test + @TestTemplate public void testValidatedOverwriteWithAppendSuccess() { TableMetadata base = TestTables.readMetadata(TABLE_NAME); long baseId = @@ -305,11 +290,10 @@ public void testValidatedOverwriteWithAppendSuccess() { .addFile(FILE_10_TO_14) // in 2018-06-09 matches and IDs are inside range .validateAddedFilesMatchOverwriteFilter(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot append file with rows that do not match filter"); - Assert.assertEquals( - "Should not create a new snapshot", baseId, latestSnapshot(base, branch).snapshotId()); + assertThat(latestSnapshot(base, branch).snapshotId()).isEqualTo(baseId); } } diff --git a/core/src/test/java/org/apache/iceberg/TestOverwriteWithValidation.java b/core/src/test/java/org/apache/iceberg/TestOverwriteWithValidation.java index a4ccb4018c9a..7cb50df919de 100644 --- a/core/src/test/java/org/apache/iceberg/TestOverwriteWithValidation.java +++ b/core/src/test/java/org/apache/iceberg/TestOverwriteWithValidation.java @@ -26,26 +26,28 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; import static org.apache.iceberg.util.SnapshotUtil.latestSnapshot; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; -@RunWith(Parameterized.class) -public class TestOverwriteWithValidation extends TableTestBase { +@ExtendWith(ParameterizedTestExtension.class) +public class TestOverwriteWithValidation extends TestBase { private static final String TABLE_NAME = "overwrite_table"; @@ -179,21 +181,16 @@ public class TestOverwriteWithValidation extends TableTestBase { private static final Expression EXPRESSION_DAY_2_ANOTHER_ID_RANGE = greaterThanOrEqual("id", 10L); - private final String branch; + @Parameter(index = 1) + private String branch; - @Parameterized.Parameters(name = "formatVersion = {0}, branch = {1}") - public static Object[] parameters() { - return new Object[][] { - new Object[] {1, "main"}, - new Object[] {1, "testBranch"}, - new Object[] {2, "main"}, - new Object[] {2, "testBranch"} - }; - } - - public TestOverwriteWithValidation(int formatVersion, String branch) { - super(formatVersion); - this.branch = branch; + @Parameters(name = "formatVersion = {0}, branch = {1}") + protected static List parameters() { + return Arrays.asList( + new Object[] {1, "main"}, + new Object[] {1, "testBranch"}, + new Object[] {2, "main"}, + new Object[] {2, "testBranch"}); } private static ByteBuffer longToBuffer(long value) { @@ -202,26 +199,26 @@ private static ByteBuffer longToBuffer(long value) { private Table table = null; - @Before + @BeforeEach public void before() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); this.table = TestTables.create(tableDir, TABLE_NAME, DATE_SCHEMA, PARTITION_SPEC, formatVersion); } - @Test + @TestTemplate public void testOverwriteEmptyTableNotValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newOverwrite().addFile(FILE_DAY_2_MODIFIED), branch); validateBranchFiles(table, branch, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteEmptyTableStrictValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit( table, @@ -235,9 +232,9 @@ public void testOverwriteEmptyTableStrictValidated() { validateBranchFiles(table, branch, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteEmptyTableValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit( table, @@ -251,7 +248,7 @@ public void testOverwriteEmptyTableValidated() { validateBranchFiles(table, branch, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteTableNotValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -263,7 +260,7 @@ public void testOverwriteTableNotValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteTableStrictValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -284,7 +281,7 @@ public void testOverwriteTableStrictValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteTableValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -305,7 +302,7 @@ public void testOverwriteTableValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteCompatibleAdditionNotValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_2), branch); @@ -321,7 +318,7 @@ public void testOverwriteCompatibleAdditionNotValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteCompatibleAdditionStrictValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_2), branch); @@ -340,17 +337,14 @@ public void testOverwriteCompatibleAdditionStrictValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1), branch); long committedSnapshotId = latestSnapshot(table, branch).snapshotId(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Found conflicting files"); - Assert.assertEquals( - "Should not create a new snapshot", - committedSnapshotId, - latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isEqualTo(committedSnapshotId); } - @Test + @TestTemplate public void testOverwriteCompatibleAdditionValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_2), branch); @@ -373,7 +367,7 @@ public void testOverwriteCompatibleAdditionValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteCompatibleDeletionValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -396,7 +390,7 @@ public void testOverwriteCompatibleDeletionValidated() { validateBranchFiles(table, branch, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteIncompatibleAdditionValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1), branch); @@ -414,17 +408,14 @@ public void testOverwriteIncompatibleAdditionValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_2), branch); long committedSnapshotId = latestSnapshot(table, branch).snapshotId(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Found conflicting files"); - Assert.assertEquals( - "Should not create a new snapshot", - committedSnapshotId, - latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isEqualTo(committedSnapshotId); } - @Test + @TestTemplate public void testOverwriteIncompatibleDeletionValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -443,17 +434,14 @@ public void testOverwriteIncompatibleDeletionValidated() { commit(table, table.newDelete().deleteFile(FILE_DAY_2), branch); long committedSnapshotId = latestSnapshot(table, branch).snapshotId(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Missing required files to delete:"); - Assert.assertEquals( - "Should not create a new snapshot", - committedSnapshotId, - latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isEqualTo(committedSnapshotId); } - @Test + @TestTemplate public void testOverwriteCompatibleRewriteAllowed() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -477,13 +465,10 @@ public void testOverwriteCompatibleRewriteAllowed() { commit(table, overwrite, branch); - Assert.assertNotEquals( - "Should successfully commit", - committedSnapshotId, - latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isNotEqualTo(committedSnapshotId); } - @Test + @TestTemplate public void testOverwriteCompatibleExpirationAdditionValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_2), branch); // id 1 @@ -508,7 +493,7 @@ public void testOverwriteCompatibleExpirationAdditionValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteCompatibleExpirationDeletionValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); // id 1 @@ -533,7 +518,7 @@ public void testOverwriteCompatibleExpirationDeletionValidated() { validateBranchFiles(table, branch, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteIncompatibleExpirationValidated() { commit(table, table.newAppend().appendFile(FILE_DAY_1), branch); // id 1 @@ -553,19 +538,16 @@ public void testOverwriteIncompatibleExpirationValidated() { table.expireSnapshots().expireSnapshotId(2L).commit(); long committedSnapshotId = latestSnapshot(table, branch).snapshotId(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot determine history"); - Assert.assertEquals( - "Should not create a new snapshot", - committedSnapshotId, - latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isEqualTo(committedSnapshotId); } - @Test + @TestTemplate public void testOverwriteIncompatibleBaseExpirationEmptyTableValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); OverwriteFiles overwrite = table @@ -581,19 +563,16 @@ public void testOverwriteIncompatibleBaseExpirationEmptyTableValidated() { table.expireSnapshots().expireSnapshotId(1L).commit(); long committedSnapshotId = latestSnapshot(table, branch).snapshotId(); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot determine history"); - Assert.assertEquals( - "Should not create a new snapshot", - committedSnapshotId, - latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isEqualTo(committedSnapshotId); } - @Test + @TestTemplate public void testOverwriteAnotherRangeValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); OverwriteFiles overwrite = table @@ -609,9 +588,9 @@ public void testOverwriteAnotherRangeValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testOverwriteAnotherRangeWithinPartitionValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); Expression conflictDetectionFilter = and(EXPRESSION_DAY_2, EXPRESSION_DAY_2_ID_RANGE); OverwriteFiles overwrite = @@ -628,9 +607,9 @@ public void testOverwriteAnotherRangeWithinPartitionValidated() { validateBranchFiles(table, branch, FILE_DAY_2_ANOTHER_RANGE, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testTransactionCompatibleAdditionValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_2), branch); @@ -653,9 +632,9 @@ public void testTransactionCompatibleAdditionValidated() { validateBranchFiles(table, branch, FILE_DAY_1, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testTransactionIncompatibleAdditionValidated() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); Transaction txn = table.newTransaction(); @@ -672,21 +651,18 @@ public void testTransactionIncompatibleAdditionValidated() { commit(table, overwrite, branch); - Assertions.assertThatThrownBy(txn::commitTransaction) + assertThatThrownBy(txn::commitTransaction) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Found conflicting files"); - Assert.assertEquals( - "Should not create a new snapshot", - committedSnapshotId, - latestSnapshot(table, branch).snapshotId()); + assertThat(latestSnapshot(table, branch).snapshotId()).isEqualTo(committedSnapshotId); } - @Test + @TestTemplate public void testConcurrentConflictingPositionDeletes() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -704,16 +680,16 @@ public void testConcurrentConflictingPositionDeletes() { commit(table, table.newRowDelta().addDeletes(FILE_DAY_2_POS_DELETES), branch); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot commit, found new delete for replaced data file"); } - @Test + @TestTemplate public void testConcurrentConflictingPositionDeletesOverwriteByFilter() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -731,14 +707,14 @@ public void testConcurrentConflictingPositionDeletesOverwriteByFilter() { commit(table, table.newRowDelta().addDeletes(FILE_DAY_2_POS_DELETES), branch); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Found new conflicting delete"); } - @Test + @TestTemplate public void testConcurrentConflictingDataFileDeleteOverwriteByFilter() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -755,14 +731,14 @@ public void testConcurrentConflictingDataFileDeleteOverwriteByFilter() { commit(table, table.newOverwrite().deleteFile(FILE_DAY_2), branch); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Found conflicting deleted files"); } - @Test + @TestTemplate public void testConcurrentNonConflictingDataFileDeleteOverwriteByFilter() { - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -784,11 +760,11 @@ public void testConcurrentNonConflictingDataFileDeleteOverwriteByFilter() { validateBranchFiles(table, branch, FILE_DAY_2_MODIFIED); } - @Test + @TestTemplate public void testConcurrentNonConflictingPositionDeletes() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -812,11 +788,11 @@ public void testConcurrentNonConflictingPositionDeletes() { validateBranchDeleteFiles(table, branch, FILE_DAY_1_POS_DELETES); } - @Test + @TestTemplate public void testConcurrentNonConflictingPositionDeletesOverwriteByFilter() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -840,11 +816,11 @@ public void testConcurrentNonConflictingPositionDeletesOverwriteByFilter() { validateBranchDeleteFiles(table, branch, FILE_DAY_1_POS_DELETES); } - @Test + @TestTemplate public void testConcurrentConflictingEqualityDeletes() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -862,16 +838,16 @@ public void testConcurrentConflictingEqualityDeletes() { commit(table, table.newRowDelta().addDeletes(FILE_DAY_2_EQ_DELETES), branch); - Assertions.assertThatThrownBy(() -> commit(table, overwrite, branch)) + assertThatThrownBy(() -> commit(table, overwrite, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot commit, found new delete for replaced data file"); } - @Test + @TestTemplate public void testConcurrentNonConflictingEqualityDeletes() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit( table, @@ -898,11 +874,11 @@ public void testConcurrentNonConflictingEqualityDeletes() { validateBranchDeleteFiles(table, branch, FILE_DAY_2_ANOTHER_RANGE_EQ_DELETES); } - @Test + @TestTemplate public void testOverwriteByFilterInheritsConflictDetectionFilter() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -926,7 +902,7 @@ public void testOverwriteByFilterInheritsConflictDetectionFilter() { validateBranchDeleteFiles(table, branch, FILE_DAY_1_POS_DELETES); } - @Test + @TestTemplate public void testOverwriteCaseSensitivity() { commit(table, table.newAppend().appendFile(FILE_DAY_1).appendFile(FILE_DAY_2), branch); @@ -934,7 +910,7 @@ public void testOverwriteCaseSensitivity() { Expression rowFilter = equal("dAtE", "2018-06-09"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -947,7 +923,7 @@ public void testOverwriteCaseSensitivity() { .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot find field 'dAtE'"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -962,7 +938,7 @@ public void testOverwriteCaseSensitivity() { .hasMessageStartingWith("Cannot find field 'dAtE'"); // binding should succeed and trigger the validation - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -977,11 +953,11 @@ public void testOverwriteCaseSensitivity() { .hasMessageStartingWith("Found conflicting files"); } - @Test + @TestTemplate public void testMetadataOnlyDeleteWithPositionDeletes() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); - Assert.assertNull("Should be empty table", latestSnapshot(table, branch)); + assertThat(latestSnapshot(table, branch)).isNull(); commit( table, diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index 5b868d3d3642..948eda552817 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -22,50 +22,45 @@ import static org.apache.iceberg.ManifestEntry.Status.DELETED; import static org.apache.iceberg.ManifestEntry.Status.EXISTING; import static org.apache.iceberg.util.SnapshotUtil.latestSnapshot; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; +import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.internal.util.collections.Sets; -@RunWith(Parameterized.class) -public class TestRewriteFiles extends TableTestBase { +@ExtendWith(ParameterizedTestExtension.class) +public class TestRewriteFiles extends TestBase { - private final String branch; + @Parameter(index = 1) + private String branch; - @Parameterized.Parameters(name = "formatVersion = {0}, branch = {1}") - public static Object[] parameters() { - return new Object[][] { - new Object[] {1, "main"}, - new Object[] {1, "testBranch"}, - new Object[] {2, "main"}, - new Object[] {2, "testBranch"} - }; + @Parameters(name = "formatVersion = {0}, branch = {1}") + protected static List parameters() { + return Arrays.asList( + new Object[] {1, "main"}, + new Object[] {1, "testBranch"}, + new Object[] {2, "main"}, + new Object[] {2, "testBranch"}); } - public TestRewriteFiles(int formatVersion, String branch) { - super(formatVersion); - this.branch = branch; - } - - @Test + @TestTemplate public void testEmptyTable() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); TableMetadata base = readMetadata(); - Assert.assertNull("Should not have a current snapshot", base.ref(branch)); + assertThat(base.ref(branch)).isNull(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -74,7 +69,7 @@ public void testEmptyTable() { .isInstanceOf(ValidationException.class) .hasMessage("Missing required files to delete: /path/to/data-a.parquet"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -90,11 +85,11 @@ public void testEmptyTable() { .hasMessage("Missing required files to delete: /path/to/data-a-deletes.parquet"); } - @Test + @TestTemplate public void testAddOnly() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> apply( table.newRewrite().rewriteFiles(Sets.newSet(FILE_A), Collections.emptySet()), @@ -102,7 +97,7 @@ public void testAddOnly() { .isInstanceOf(ValidationException.class) .hasMessage("Missing required files to delete: /path/to/data-a.parquet"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> apply( table @@ -117,7 +112,7 @@ public void testAddOnly() { .hasMessage( "Delete files to add must be empty because there's no delete file to be rewritten"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> apply( table @@ -133,11 +128,11 @@ public void testAddOnly() { "Delete files to add must be empty because there's no delete file to be rewritten"); } - @Test + @TestTemplate public void testDeleteOnly() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> apply( table.newRewrite().rewriteFiles(Collections.emptySet(), Sets.newSet(FILE_A)), @@ -145,7 +140,7 @@ public void testDeleteOnly() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Files to delete cannot be empty"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> apply( table @@ -159,7 +154,7 @@ public void testDeleteOnly() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Files to delete cannot be empty"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> apply( table @@ -174,28 +169,23 @@ public void testDeleteOnly() { .hasMessage("Files to delete cannot be empty"); } - @Test + @TestTemplate public void testDeleteWithDuplicateEntriesInManifest() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); commit( table, table.newAppend().appendFile(FILE_A).appendFile(FILE_A).appendFile(FILE_B), branch); TableMetadata base = readMetadata(); long baseSnapshotId = latestSnapshot(base, branch).snapshotId(); - Assert.assertEquals( - "Should create 1 manifest for initial write", - 1, - latestSnapshot(base, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(base, branch).allManifests(table.io())).hasSize(1); ManifestFile initialManifest = latestSnapshot(base, branch).allManifests(table.io()).get(0); Snapshot pending = apply(table.newRewrite().rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_C)), branch); - Assert.assertEquals("Should contain 2 manifest", 2, pending.allManifests(table.io()).size()); - Assert.assertFalse( - "Should not contain manifest from initial write", - pending.allManifests(table.io()).contains(initialManifest)); + assertThat(pending.allManifests(table.io())).hasSize(2); + assertThat(pending.allManifests(table.io())).doesNotContain(initialManifest); long pendingId = pending.snapshotId(); @@ -209,30 +199,25 @@ public void testDeleteWithDuplicateEntriesInManifest() { statuses(DELETED, DELETED, EXISTING)); // We should only get the 3 manifests that this test is expected to add. - Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(3); } - @Test + @TestTemplate public void testAddAndDelete() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); commit(table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B), branch); TableMetadata base = readMetadata(); long baseSnapshotId = latestSnapshot(base, branch).snapshotId(); - Assert.assertEquals( - "Should create 1 manifest for initial write", - 1, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(1); ManifestFile initialManifest = latestSnapshot(table, branch).allManifests(table.io()).get(0); Snapshot pending = apply(table.newRewrite().rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_C)), branch); - Assert.assertEquals("Should contain 2 manifest", 2, pending.allManifests(table.io()).size()); - Assert.assertFalse( - "Should not contain manifest from initial write", - pending.allManifests(table.io()).contains(initialManifest)); + assertThat(pending.allManifests(table.io())).hasSize(2); + assertThat(pending.allManifests(table.io())).doesNotContain(initialManifest); long pendingId = pending.snapshotId(); @@ -246,14 +231,15 @@ public void testAddAndDelete() { statuses(DELETED, EXISTING)); // We should only get the 3 manifests that this test is expected to add. - Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(3); } - @Test + @TestTemplate public void testRewriteDataAndDeleteFiles() { - Assume.assumeTrue( - "Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assumeThat(formatVersion) + .as("Rewriting delete files is only supported in iceberg format v2 or later") + .isGreaterThan(1); + assertThat(listManifestFiles()).isEmpty(); commit( table, @@ -269,8 +255,7 @@ public void testRewriteDataAndDeleteFiles() { TableMetadata base = readMetadata(); Snapshot baseSnap = latestSnapshot(base, branch); long baseSnapshotId = baseSnap.snapshotId(); - Assert.assertEquals( - "Should create 2 manifests for initial write", 2, baseSnap.allManifests(table.io()).size()); + assertThat(baseSnap.allManifests(table.io())).hasSize(2); List initialManifests = baseSnap.allManifests(table.io()); validateManifestEntries( @@ -299,10 +284,8 @@ public void testRewriteDataAndDeleteFiles() { ImmutableSet.of()), branch); - Assert.assertEquals("Should contain 3 manifest", 3, pending.allManifests(table.io()).size()); - Assert.assertFalse( - "Should not contain manifest from initial write", - pending.allManifests(table.io()).stream().anyMatch(initialManifests::contains)); + assertThat(pending.allManifests(table.io())).hasSize(3); + assertThat(pending.allManifests(table.io())).doesNotContainAnyElementsOf(initialManifests); long pendingId = pending.snapshotId(); validateManifestEntries( @@ -323,14 +306,15 @@ public void testRewriteDataAndDeleteFiles() { statuses(DELETED, EXISTING)); // We should only get the 5 manifests that this test is expected to add. - Assert.assertEquals("Only 5 manifests should exist", 5, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(5); } - @Test + @TestTemplate public void testRewriteDataAndAssignOldSequenceNumber() { - Assume.assumeTrue( - "Sequence number is only supported in iceberg format v2. ", formatVersion > 1); - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assumeThat(formatVersion) + .as("Sequence number is only supported in iceberg format v2 or later") + .isGreaterThan(1); + assertThat(listManifestFiles()).isEmpty(); commit( table, @@ -346,8 +330,7 @@ public void testRewriteDataAndAssignOldSequenceNumber() { TableMetadata base = readMetadata(); Snapshot baseSnap = latestSnapshot(base, branch); long baseSnapshotId = baseSnap.snapshotId(); - Assert.assertEquals( - "Should create 2 manifests for initial write", 2, baseSnap.allManifests(table.io()).size()); + assertThat(baseSnap.allManifests(table.io())).hasSize(2); List initialManifests = baseSnap.allManifests(table.io()); validateManifestEntries( @@ -373,24 +356,15 @@ public void testRewriteDataAndAssignOldSequenceNumber() { .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_D), oldSequenceNumber), branch); - Assert.assertEquals("Should contain 3 manifest", 3, pending.allManifests(table.io()).size()); - Assert.assertFalse( - "Should not contain data manifest from initial write", - pending.dataManifests(table.io()).stream().anyMatch(initialManifests::contains)); + assertThat(pending.allManifests(table.io())).hasSize(3); + assertThat(pending.dataManifests(table.io())).doesNotContainAnyElementsOf(initialManifests); long pendingId = pending.snapshotId(); ManifestFile newManifest = pending.allManifests(table.io()).get(0); validateManifestEntries(newManifest, ids(pendingId), files(FILE_D), statuses(ADDED)); - for (ManifestEntry entry : ManifestFiles.read(newManifest, FILE_IO).entries()) { - Assert.assertEquals( - "Should have old sequence number for manifest entries", - oldSequenceNumber, - (long) entry.dataSequenceNumber()); - } - Assert.assertEquals( - "Should use new sequence number for the manifest file", - oldSequenceNumber + 1, - newManifest.sequenceNumber()); + assertThat(ManifestFiles.read(newManifest, FILE_IO).entries()) + .allSatisfy(entry -> assertThat(entry.dataSequenceNumber()).isEqualTo(oldSequenceNumber)); + assertThat(newManifest.sequenceNumber()).isEqualTo(oldSequenceNumber + 1); validateManifestEntries( pending.allManifests(table.io()).get(1), @@ -407,10 +381,10 @@ public void testRewriteDataAndAssignOldSequenceNumber() { statuses(ADDED, ADDED)); // We should only get the 4 manifests that this test is expected to add. - Assert.assertEquals("Only 4 manifests should exist", 4, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(4); } - @Test + @TestTemplate public void testFailure() { commit(table, table.newAppend().appendFile(FILE_A), branch); @@ -420,28 +394,29 @@ public void testFailure() { table.newRewrite().rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_B)); Snapshot pending = apply(rewrite, branch); - Assert.assertEquals("Should produce 2 manifests", 2, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(2); ManifestFile manifest1 = pending.allManifests(table.io()).get(0); ManifestFile manifest2 = pending.allManifests(table.io()).get(1); validateManifestEntries(manifest1, ids(pending.snapshotId()), files(FILE_B), statuses(ADDED)); validateManifestEntries(manifest2, ids(pending.snapshotId()), files(FILE_A), statuses(DELETED)); - Assertions.assertThatThrownBy(() -> commit(table, rewrite, branch)) + assertThatThrownBy(() -> commit(table, rewrite, branch)) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); - Assert.assertFalse("Should clean up new manifest", new File(manifest1.path()).exists()); - Assert.assertFalse("Should clean up new manifest", new File(manifest2.path()).exists()); + assertThat(new File(manifest1.path())).doesNotExist(); + assertThat(new File(manifest2.path())).doesNotExist(); // As commit failed all the manifests added with rewrite should be cleaned up - Assert.assertEquals("Only 1 manifest should exist", 1, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(1); } - @Test + @TestTemplate public void testFailureWhenRewriteBothDataAndDeleteFiles() { - Assume.assumeTrue( - "Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); + assumeThat(formatVersion) + .as("Rewriting delete files is only supported in iceberg format v2 or later") + .isGreaterThan(1); commit( table, @@ -468,7 +443,7 @@ public void testFailureWhenRewriteBothDataAndDeleteFiles() { ImmutableSet.of()); Snapshot pending = apply(rewrite, branch); - Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(3); ManifestFile manifest1 = pending.allManifests(table.io()).get(0); ManifestFile manifest2 = pending.allManifests(table.io()).get(1); ManifestFile manifest3 = pending.allManifests(table.io()).get(2); @@ -493,19 +468,19 @@ public void testFailureWhenRewriteBothDataAndDeleteFiles() { files(FILE_A_DELETES, FILE_B_DELETES), statuses(DELETED, DELETED)); - Assertions.assertThatThrownBy(rewrite::commit) + assertThatThrownBy(rewrite::commit) .isInstanceOf(CommitFailedException.class) .hasMessage("Injected failure"); - Assert.assertFalse("Should clean up new manifest", new File(manifest1.path()).exists()); - Assert.assertFalse("Should clean up new manifest", new File(manifest2.path()).exists()); - Assert.assertFalse("Should clean up new manifest", new File(manifest3.path()).exists()); + assertThat(new File(manifest1.path())).doesNotExist(); + assertThat(new File(manifest2.path())).doesNotExist(); + assertThat(new File(manifest3.path())).doesNotExist(); // As commit failed all the manifests added with rewrite should be cleaned up - Assert.assertEquals("Only 2 manifest should exist", 2, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(2); } - @Test + @TestTemplate public void testRecovery() { commit(table, table.newAppend().appendFile(FILE_A), branch); @@ -515,7 +490,7 @@ public void testRecovery() { table.newRewrite().rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_B)); Snapshot pending = apply(rewrite, branch); - Assert.assertEquals("Should produce 2 manifests", 2, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(2); ManifestFile manifest1 = pending.allManifests(table.io()).get(0); ManifestFile manifest2 = pending.allManifests(table.io()).get(1); @@ -524,23 +499,21 @@ public void testRecovery() { commit(table, rewrite, branch); - Assert.assertTrue("Should reuse the manifest for appends", new File(manifest1.path()).exists()); - Assert.assertTrue( - "Should reuse the manifest with deletes", new File(manifest2.path()).exists()); + assertThat(new File(manifest1.path())).exists(); + assertThat(new File(manifest2.path())).exists(); TableMetadata metadata = readMetadata(); - Assert.assertTrue( - "Should commit the manifest for append", - latestSnapshot(metadata, branch).allManifests(table.io()).contains(manifest2)); + assertThat(latestSnapshot(metadata, branch).allManifests(table.io())).contains(manifest2); // 2 manifests added by rewrite and 1 original manifest should be found. - Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(3); } - @Test + @TestTemplate public void testRecoverWhenRewriteBothDataAndDeleteFiles() { - Assume.assumeTrue( - "Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); + assumeThat(formatVersion) + .as("Rewriting delete files is only supported in iceberg format v2 or later") + .isGreaterThan(1); commit( table, @@ -567,7 +540,7 @@ public void testRecoverWhenRewriteBothDataAndDeleteFiles() { ImmutableSet.of()); Snapshot pending = apply(rewrite, branch); - Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(3); ManifestFile manifest1 = pending.allManifests(table.io()).get(0); ManifestFile manifest2 = pending.allManifests(table.io()).get(1); ManifestFile manifest3 = pending.allManifests(table.io()).get(2); @@ -590,25 +563,24 @@ public void testRecoverWhenRewriteBothDataAndDeleteFiles() { commit(table, rewrite, branch); - Assert.assertTrue("Should reuse new manifest", new File(manifest1.path()).exists()); - Assert.assertTrue("Should reuse new manifest", new File(manifest2.path()).exists()); - Assert.assertTrue("Should reuse new manifest", new File(manifest3.path()).exists()); + assertThat(new File(manifest1.path())).exists(); + assertThat(new File(manifest2.path())).exists(); + assertThat(new File(manifest3.path())).exists(); TableMetadata metadata = readMetadata(); List committedManifests = Lists.newArrayList(manifest1, manifest2, manifest3); - Assert.assertEquals( - "Should committed the manifests", - latestSnapshot(metadata, branch).allManifests(table.io()), - committedManifests); + assertThat(latestSnapshot(metadata, branch).allManifests(table.io())) + .isEqualTo(committedManifests); // As commit success all the manifests added with rewrite should be available. - Assert.assertEquals("Only 5 manifest should exist", 5, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(5); } - @Test + @TestTemplate public void testReplaceEqualityDeletesWithPositionDeletes() { - Assume.assumeTrue( - "Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); + assumeThat(formatVersion) + .as("Rewriting delete files is only supported in iceberg format v2 or later") + .isGreaterThan(1); commit(table, table.newRowDelta().addRows(FILE_A2).addDeletes(FILE_A2_DELETES), branch); @@ -624,7 +596,7 @@ public void testReplaceEqualityDeletesWithPositionDeletes() { ImmutableSet.of(), ImmutableSet.of(FILE_B_DELETES)); Snapshot pending = apply(rewrite, branch); - Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(3); ManifestFile manifest1 = pending.allManifests(table.io()).get(0); ManifestFile manifest2 = pending.allManifests(table.io()).get(1); ManifestFile manifest3 = pending.allManifests(table.io()).get(2); @@ -649,25 +621,24 @@ public void testReplaceEqualityDeletesWithPositionDeletes() { commit(table, rewrite, branch); - Assert.assertTrue("Should reuse new manifest", new File(manifest1.path()).exists()); - Assert.assertTrue("Should reuse new manifest", new File(manifest2.path()).exists()); - Assert.assertTrue("Should reuse new manifest", new File(manifest3.path()).exists()); + assertThat(new File(manifest1.path())).exists(); + assertThat(new File(manifest2.path())).exists(); + assertThat(new File(manifest3.path())).exists(); metadata = readMetadata(); List committedManifests = Lists.newArrayList(manifest1, manifest2, manifest3); - Assert.assertEquals( - "Should committed the manifests", - latestSnapshot(metadata, branch).allManifests(table.io()), - committedManifests); + assertThat(latestSnapshot(metadata, branch).allManifests(table.io())) + .isEqualTo(committedManifests); // As commit success all the manifests added with rewrite should be available. - Assert.assertEquals("4 manifests should exist", 4, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(4); } - @Test + @TestTemplate public void testRemoveAllDeletes() { - Assume.assumeTrue( - "Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); + assumeThat(formatVersion) + .as("Rewriting delete files is only supported in iceberg format v2 or later") + .isGreaterThan(1); commit(table, table.newRowDelta().addRows(FILE_A).addDeletes(FILE_A_DELETES), branch); @@ -681,7 +652,7 @@ public void testRemoveAllDeletes() { ImmutableSet.of(), ImmutableSet.of()); Snapshot pending = apply(rewrite, branch); - Assert.assertEquals("Should produce 2 manifests", 2, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(2); ManifestFile manifest1 = pending.allManifests(table.io()).get(0); ManifestFile manifest2 = pending.allManifests(table.io()).get(1); @@ -697,32 +668,28 @@ public void testRemoveAllDeletes() { commit(table, rewrite, branch); - Assert.assertTrue("Should reuse the new manifest", new File(manifest1.path()).exists()); - Assert.assertTrue("Should reuse the new manifest", new File(manifest2.path()).exists()); + assertThat(new File(manifest1.path())).exists(); + assertThat(new File(manifest2.path())).exists(); TableMetadata metadata = readMetadata(); List committedManifests = Lists.newArrayList(manifest1, manifest2); - Assert.assertTrue( - "Should committed the manifests", - latestSnapshot(metadata, branch).allManifests(table.io()).containsAll(committedManifests)); + assertThat(latestSnapshot(metadata, branch).allManifests(table.io())) + .containsAll(committedManifests); // As commit success all the manifests added with rewrite should be available. - Assert.assertEquals("4 manifests should exist", 4, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(4); } - @Test + @TestTemplate public void testDeleteNonExistentFile() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); commit(table, table.newAppend().appendFile(FILE_A).appendFile(FILE_B), branch); TableMetadata base = readMetadata(); - Assert.assertEquals( - "Should create 1 manifest for initial write", - 1, - latestSnapshot(base, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(base, branch).allManifests(table.io())).hasSize(1); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -731,26 +698,23 @@ public void testDeleteNonExistentFile() { .isInstanceOf(ValidationException.class) .hasMessage("Missing required files to delete: /path/to/data-c.parquet"); - Assert.assertEquals("Only 1 manifests should exist", 1, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(1); } - @Test + @TestTemplate public void testAlreadyDeletedFile() { - Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + assertThat(listManifestFiles()).isEmpty(); commit(table, table.newAppend().appendFile(FILE_A), branch); TableMetadata base = readMetadata(); - Assert.assertEquals( - "Should create 1 manifest for initial write", - 1, - latestSnapshot(base, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(base, branch).allManifests(table.io())).hasSize(1); RewriteFiles rewrite = table.newRewrite(); Snapshot pending = apply(rewrite.rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_B)), branch); - Assert.assertEquals("Should contain 2 manifest", 2, pending.allManifests(table.io()).size()); + assertThat(pending.allManifests(table.io())).hasSize(2); long pendingId = pending.snapshotId(); @@ -765,7 +729,7 @@ public void testAlreadyDeletedFile() { commit(table, rewrite, branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -774,12 +738,14 @@ public void testAlreadyDeletedFile() { .isInstanceOf(ValidationException.class) .hasMessage("Missing required files to delete: /path/to/data-a.parquet"); - Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); + assertThat(listManifestFiles()).hasSize(3); } - @Test + @TestTemplate public void testNewDeleteFile() { - Assume.assumeTrue("Delete files are only supported in v2", formatVersion > 1); + assumeThat(formatVersion) + .as("Rewriting delete files is only supported in iceberg format v2 or later") + .isGreaterThan(1); commit(table, table.newAppend().appendFile(FILE_A), branch); @@ -789,7 +755,7 @@ public void testNewDeleteFile() { long snapshotAfterDeletes = latestSnapshot(table, branch).snapshotId(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> apply( table From 715140113a1d30dc213e677a7d65a5dbe51dde90 Mon Sep 17 00:00:00 2001 From: Daniel Weeks Date: Wed, 20 Mar 2024 12:45:58 -0700 Subject: [PATCH 19/23] Add issue template and docs for iceberg proposals (#9932) * Add issue template and docs for iceberg proposals * Add vote clarification * Update site/docs/contribute.md Co-authored-by: Renjie Liu * Update site/docs/contribute.md Co-authored-by: Renjie Liu * Update .github/ISSUE_TEMPLATE/iceberg_proposal.yml Co-authored-by: Eduard Tudenhoefner * Fix issue template link * Address comments --------- Co-authored-by: Renjie Liu Co-authored-by: Eduard Tudenhoefner --- .github/ISSUE_TEMPLATE/iceberg_proposal.yml | 50 +++++++++++++++++++++ site/docs/contribute.md | 36 +++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/iceberg_proposal.yml diff --git a/.github/ISSUE_TEMPLATE/iceberg_proposal.yml b/.github/ISSUE_TEMPLATE/iceberg_proposal.yml new file mode 100644 index 000000000000..af96b5399e15 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/iceberg_proposal.yml @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +--- +name: Iceberg Improvement Proposal +description: Propose a Spec change or major feature +labels: ["proposal"] +body: + - type: markdown + attributes: + value: "Please see documentation site for information on [contributing proposals](https://iceberg.apache.org/contribute/#apache-iceberg-improvement-proposals)" + - type: textarea + attributes: + label: Proposed Change + description: Please describe the proposal and elaborate on the use case and motivation + validations: + required: true + - type: input + attributes: + label: Proposal document + description: | + Link to the proposal document. Google Docs is preferred format to allow for public + comment and sharing + - type: checkboxes + attributes: + label: Specifications + description: Which specifications are affected by this proposal? + options: + - label: Table + - label: View + - label: REST + - label: Puffin + - label: Encryption + - label: Other \ No newline at end of file diff --git a/site/docs/contribute.md b/site/docs/contribute.md index e7fe35d14c06..2c9add8df28c 100644 --- a/site/docs/contribute.md +++ b/site/docs/contribute.md @@ -46,6 +46,42 @@ The Iceberg community prefers to receive contributions as [Github pull requests] * If a PR is posted for visibility and isn't necessarily ready for review or merging, be sure to convert the PR to a draft +## Apache Iceberg Improvement Proposals + +### What is an improvement proposal? + +An improvement proposal is a major change to Apache Iceberg that may require changes to an existing specification, creation +of a new specification, or significant additions/changes to any of the existing Iceberg implementations. Changes that are large in +scope need to be considered carefully and incorporate feedback from many community stakeholders. + +### What should a proposal include? + +1. A GitHub issue created using the `Apache Iceberg Improvement Proposal` template +2. A document including the following: + * Motivation for the change + * Implementation proposal + * Breaking changes/incompatibilities + * Alternatives considered +3. A discussion thread initiated in the dev list with the Subject: '[DISCUSS] ' + +### Who can submit a proposal? + +Anyone can submit a proposal, but be considerate and submit only if you plan on contributing to the implementation. + +### Where can I find current proposals? + +Current proposals are tracked in GitHub issues with the label [Proposal][iceberg-proposals] + +### How are proposals adopted? + +Once general consensus has been reached, a vote should be raised on the dev list. The vote follows the ASF +[code modification][apache-vote] model with three positive PMC votes required and no lazy consensus modifier. +The voting process should be held in good faith to reinforce and affirm the agreed upon proposal, not to +settle disagreements or to force a decision. + +[iceberg-proposals]: https://github.com/apache/iceberg/issues?q=is%3Aissue+is%3Aopen+label%3Aproposal+ +[apache-vote]: https://www.apache.org/foundation/voting.html#apache-voting-process + ## Building the Project Locally Iceberg is built using Gradle with Java 8 or Java 11. From 59ffa33e3d317ca2a3b2e0c3ea4a787ae19cc010 Mon Sep 17 00:00:00 2001 From: Tom Tanaka <43331405+tomtongue@users.noreply.github.com> Date: Fri, 22 Mar 2024 01:50:00 +0900 Subject: [PATCH 20/23] Core: Migrate tests to JUnit5 (#10014) --- .../apache/iceberg/TestReplacePartitions.java | 209 ++++------ .../iceberg/TestScanDataFileColumns.java | 47 +-- .../org/apache/iceberg/TestScanSummary.java | 133 +++--- .../iceberg/TestScansAndSchemaEvolution.java | 45 +- .../iceberg/TestSchemaAndMappingUpdate.java | 224 ++++------ .../java/org/apache/iceberg/TestSchemaID.java | 129 ++---- .../org/apache/iceberg/TestSchemaUpdate.java | 389 ++++++++---------- .../iceberg/TestTableUpdatePartitionSpec.java | 97 +++-- .../iceberg/TestTimestampPartitions.java | 32 +- .../iceberg/TestUpdatePartitionSpec.java | 272 +++++------- 10 files changed, 667 insertions(+), 910 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java b/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java index e657e7fc43ca..a25920a1d733 100644 --- a/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java +++ b/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java @@ -19,20 +19,22 @@ package org.apache.iceberg; import static org.apache.iceberg.util.SnapshotUtil.latestSnapshot; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.ManifestEntry.Status; import org.apache.iceberg.exceptions.ValidationException; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; -@RunWith(Parameterized.class) -public class TestReplacePartitions extends TableTestBase { +@ExtendWith(ParameterizedTestExtension.class) +public class TestReplacePartitions extends TestBase { static final DataFile FILE_E = DataFiles.builder(SPEC) @@ -73,24 +75,19 @@ public class TestReplacePartitions extends TableTestBase { .withRecordCount(1) .build(); - private final String branch; + @Parameter(index = 1) + private String branch; - @Parameterized.Parameters(name = "formatVersion = {0}, branch = {1}") - public static Object[] parameters() { - return new Object[][] { - new Object[] {1, "main"}, - new Object[] {1, "testBranch"}, - new Object[] {2, "main"}, - new Object[] {2, "testBranch"} - }; + @Parameters(name = "formatVersion = {0}, branch = {1}") + protected static List parameters() { + return Arrays.asList( + new Object[] {1, "main"}, + new Object[] {1, "testBranch"}, + new Object[] {2, "main"}, + new Object[] {2, "testBranch"}); } - public TestReplacePartitions(int formatVersion, String branch) { - super(formatVersion); - this.branch = branch; - } - - @Test + @TestTemplate public void testReplaceOnePartition() { commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B), branch); @@ -100,11 +97,8 @@ public void testReplaceOnePartition() { commit(table, table.newReplacePartitions().addFile(FILE_E), branch); long replaceId = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertNotEquals("Should create a new snapshot", baseId, replaceId); - Assert.assertEquals( - "Table should have 2 manifests", - 2, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(replaceId).isNotEqualTo(baseId); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(2); // manifest is not merged because it is less than the minimum validateManifestEntries( @@ -120,7 +114,7 @@ public void testReplaceOnePartition() { statuses(Status.DELETED, Status.EXISTING)); } - @Test + @TestTemplate public void testReplaceAndMergeOnePartition() { // ensure the overwrite results in a merge table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); @@ -133,11 +127,8 @@ public void testReplaceAndMergeOnePartition() { commit(table, table.newReplacePartitions().addFile(FILE_E), branch); long replaceId = latestSnapshot(table, branch).snapshotId(); - Assert.assertNotEquals("Should create a new snapshot", baseId, replaceId); - Assert.assertEquals( - "Table should have 1 manifest", - 1, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(replaceId).isNotEqualTo(baseId); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(1); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), @@ -146,37 +137,31 @@ public void testReplaceAndMergeOnePartition() { statuses(Status.ADDED, Status.DELETED, Status.EXISTING)); } - @Test + @TestTemplate public void testReplaceWithUnpartitionedTable() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Table unpartitioned = TestTables.create( tableDir, "unpartitioned", SCHEMA, PartitionSpec.unpartitioned(), formatVersion); - Assert.assertEquals( - "Table version should be 0", 0, (long) TestTables.metadataVersion("unpartitioned")); + assertThat(TestTables.metadataVersion("unpartitioned")).isEqualTo(0); commit(table, unpartitioned.newAppend().appendFile(FILE_A), branch); // make sure the data was successfully added - Assert.assertEquals( - "Table version should be 1", 1, (long) TestTables.metadataVersion("unpartitioned")); + assertThat(TestTables.metadataVersion("unpartitioned")).isEqualTo(1); validateSnapshot( null, latestSnapshot(TestTables.readMetadata("unpartitioned"), branch), FILE_A); ReplacePartitions replacePartitions = unpartitioned.newReplacePartitions().addFile(FILE_B); commit(table, replacePartitions, branch); - Assert.assertEquals( - "Table version should be 2", 2, (long) TestTables.metadataVersion("unpartitioned")); + assertThat(TestTables.metadataVersion("unpartitioned")).isEqualTo(2); TableMetadata replaceMetadata = TestTables.readMetadata("unpartitioned"); long replaceId = latestSnapshot(replaceMetadata, branch).snapshotId(); - Assert.assertEquals( - "Table should have 2 manifests", - 2, - latestSnapshot(replaceMetadata, branch).allManifests(unpartitioned.io()).size()); + assertThat(latestSnapshot(replaceMetadata, branch).allManifests(unpartitioned.io())).hasSize(2); validateManifestEntries( latestSnapshot(replaceMetadata, branch).allManifests(unpartitioned.io()).get(0), @@ -191,10 +176,10 @@ public void testReplaceWithUnpartitionedTable() throws IOException { statuses(Status.DELETED)); } - @Test + @TestTemplate public void testReplaceAndMergeWithUnpartitionedTable() throws IOException { - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); Table unpartitioned = TestTables.create( @@ -203,30 +188,24 @@ public void testReplaceAndMergeWithUnpartitionedTable() throws IOException { // ensure the overwrite results in a merge unpartitioned.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "1").commit(); - Assert.assertEquals( - "Table version should be 1", 1, (long) TestTables.metadataVersion("unpartitioned")); + assertThat(TestTables.metadataVersion("unpartitioned")).isEqualTo(1); AppendFiles appendFiles = unpartitioned.newAppend().appendFile(FILE_A); commit(table, appendFiles, branch); // make sure the data was successfully added - Assert.assertEquals( - "Table version should be 2", 2, (long) TestTables.metadataVersion("unpartitioned")); + assertThat(TestTables.metadataVersion("unpartitioned")).isEqualTo(2); validateSnapshot( null, latestSnapshot(TestTables.readMetadata("unpartitioned"), branch), FILE_A); ReplacePartitions replacePartitions = unpartitioned.newReplacePartitions().addFile(FILE_B); commit(table, replacePartitions, branch); - Assert.assertEquals( - "Table version should be 3", 3, (long) TestTables.metadataVersion("unpartitioned")); + assertThat(TestTables.metadataVersion("unpartitioned")).isEqualTo(3); TableMetadata replaceMetadata = TestTables.readMetadata("unpartitioned"); long replaceId = latestSnapshot(replaceMetadata, branch).snapshotId(); - Assert.assertEquals( - "Table should have 1 manifest", - 1, - latestSnapshot(replaceMetadata, branch).allManifests(unpartitioned.io()).size()); + assertThat(latestSnapshot(replaceMetadata, branch).allManifests(unpartitioned.io())).hasSize(1); validateManifestEntries( latestSnapshot(replaceMetadata, branch).allManifests(unpartitioned.io()).get(0), @@ -235,7 +214,7 @@ public void testReplaceAndMergeWithUnpartitionedTable() throws IOException { statuses(Status.ADDED, Status.DELETED)); } - @Test + @TestTemplate public void testValidationFailure() { commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B), branch); @@ -245,17 +224,14 @@ public void testValidationFailure() { ReplacePartitions replace = table.newReplacePartitions().addFile(FILE_F).addFile(FILE_G).validateAppendOnly(); - Assertions.assertThatThrownBy(() -> commit(table, replace, branch)) + assertThatThrownBy(() -> commit(table, replace, branch)) .isInstanceOf(ValidationException.class) .hasMessageStartingWith("Cannot commit file that conflicts with existing partition"); - Assert.assertEquals( - "Should not create a new snapshot", - baseId, - latestSnapshot(readMetadata(), branch).snapshotId()); + assertThat(latestSnapshot(readMetadata(), branch).snapshotId()).isEqualTo(baseId); } - @Test + @TestTemplate public void testValidationSuccess() { commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B), branch); @@ -265,11 +241,8 @@ public void testValidationSuccess() { commit(table, table.newReplacePartitions().addFile(FILE_G).validateAppendOnly(), branch); long replaceId = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertNotEquals("Should create a new snapshot", baseId, replaceId); - Assert.assertEquals( - "Table should have 2 manifests", - 2, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(replaceId).isNotEqualTo(baseId); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(2); // manifest is not merged because it is less than the minimum validateManifestEntries( @@ -285,7 +258,7 @@ public void testValidationSuccess() { statuses(Status.ADDED, Status.ADDED)); } - @Test + @TestTemplate public void testValidationNotInvoked() { commit(table, table.newFastAppend().appendFile(FILE_A), branch); @@ -309,10 +282,7 @@ public void testValidationNotInvoked() { branch); long replaceId = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertEquals( - "Table should have 2 manifest", - 2, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(2); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), ids(replaceId, replaceId), @@ -325,13 +295,13 @@ public void testValidationNotInvoked() { statuses(Status.DELETED)); } - @Test + @TestTemplate public void testValidateWithDefaultSnapshotId() { commit(table, table.newReplacePartitions().addFile(FILE_A), branch); // Concurrent Replace Partitions should fail with ValidationException ReplacePartitions replace = table.newReplacePartitions(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -347,7 +317,7 @@ public void testValidateWithDefaultSnapshotId() { + "[data_bucket=0, data_bucket=1]: [/path/to/data-a.parquet]"); } - @Test + @TestTemplate public void testConcurrentReplaceConflict() { commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B), branch); @@ -357,7 +327,7 @@ public void testConcurrentReplaceConflict() { // Concurrent Replace Partitions should fail with ValidationException commit(table, table.newReplacePartitions().addFile(FILE_A), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -375,7 +345,7 @@ public void testConcurrentReplaceConflict() { + "[data_bucket=0, data_bucket=1]: [/path/to/data-a.parquet]"); } - @Test + @TestTemplate public void testConcurrentReplaceNoConflict() { commit(table, table.newFastAppend().appendFile(FILE_A), branch); @@ -397,10 +367,7 @@ public void testConcurrentReplaceNoConflict() { branch); long id3 = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertEquals( - "Table should have 2 manifests", - 2, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(2); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), ids(id3), @@ -413,7 +380,7 @@ public void testConcurrentReplaceNoConflict() { statuses(Status.ADDED)); } - @Test + @TestTemplate public void testConcurrentReplaceConflictNonPartitioned() { Table unpartitioned = TestTables.create( @@ -426,7 +393,7 @@ public void testConcurrentReplaceConflictNonPartitioned() { // Concurrent ReplacePartitions should fail with ValidationException commit(table, unpartitioned.newReplacePartitions().addFile(FILE_UNPARTITIONED_A), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -443,7 +410,7 @@ public void testConcurrentReplaceConflictNonPartitioned() { + "[/path/to/data-unpartitioned-a.parquet]"); } - @Test + @TestTemplate public void testAppendReplaceConflict() { commit(table, table.newFastAppend().appendFile(FILE_A), branch); @@ -453,7 +420,7 @@ public void testAppendReplaceConflict() { // Concurrent Append and ReplacePartition should fail with ValidationException commit(table, table.newFastAppend().appendFile(FILE_B), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -471,7 +438,7 @@ public void testAppendReplaceConflict() { + "[data_bucket=0, data_bucket=1]: [/path/to/data-b.parquet]"); } - @Test + @TestTemplate public void testAppendReplaceNoConflict() { commit(table, table.newFastAppend().appendFile(FILE_A), branch); @@ -494,10 +461,7 @@ public void testAppendReplaceNoConflict() { branch); long id3 = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertEquals( - "Table should have 3 manifests", - 3, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(3); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), ids(id3), @@ -515,7 +479,7 @@ public void testAppendReplaceNoConflict() { statuses(Status.DELETED)); } - @Test + @TestTemplate public void testAppendReplaceConflictNonPartitioned() { Table unpartitioned = TestTables.create( @@ -528,7 +492,7 @@ public void testAppendReplaceConflictNonPartitioned() { // Concurrent Append and ReplacePartitions should fail with ValidationException commit(table, unpartitioned.newAppend().appendFile(FILE_UNPARTITIONED_A), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -545,9 +509,9 @@ public void testAppendReplaceConflictNonPartitioned() { + "[/path/to/data-unpartitioned-a.parquet]"); } - @Test + @TestTemplate public void testDeleteReplaceConflict() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); commit(table, table.newFastAppend().appendFile(FILE_A), branch); TableMetadata base = readMetadata(); @@ -557,7 +521,7 @@ public void testDeleteReplaceConflict() { commit( table, table.newRowDelta().addDeletes(FILE_A_DELETES).validateFromSnapshot(baseId), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -574,9 +538,9 @@ public void testDeleteReplaceConflict() { + "[data_bucket=0]: [/path/to/data-a-deletes.parquet]"); } - @Test + @TestTemplate public void testDeleteReplaceConflictNonPartitioned() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); Table unpartitioned = TestTables.create( @@ -589,7 +553,7 @@ public void testDeleteReplaceConflictNonPartitioned() { // Concurrent Delete and ReplacePartitions should fail with ValidationException commit(table, unpartitioned.newRowDelta().addDeletes(FILE_UNPARTITIONED_A_DELETES), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -606,9 +570,9 @@ public void testDeleteReplaceConflictNonPartitioned() { + "[/path/to/data-unpartitioned-a-deletes.parquet]"); } - @Test + @TestTemplate public void testDeleteReplaceNoConflict() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); commit(table, table.newFastAppend().appendFile(FILE_A), branch); long id1 = latestSnapshot(readMetadata(), branch).snapshotId(); @@ -638,10 +602,7 @@ public void testDeleteReplaceNoConflict() { long id3 = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertEquals( - "Table should have 3 manifest", - 3, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(3); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), ids(id3), @@ -661,9 +622,9 @@ public void testDeleteReplaceNoConflict() { statuses(Status.ADDED)); } - @Test + @TestTemplate public void testOverwriteReplaceConflict() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); commit(table, table.newFastAppend().appendFile(FILE_A), branch); TableMetadata base = readMetadata(); @@ -672,7 +633,7 @@ public void testOverwriteReplaceConflict() { // Concurrent Overwrite and ReplacePartition should fail with ValidationException commit(table, table.newOverwrite().deleteFile(FILE_A), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -689,9 +650,9 @@ public void testOverwriteReplaceConflict() { + "[data_bucket=0]: [/path/to/data-a.parquet]"); } - @Test + @TestTemplate public void testOverwriteReplaceNoConflict() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B), branch); TableMetadata base = readMetadata(); @@ -713,10 +674,7 @@ public void testOverwriteReplaceNoConflict() { long finalId = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertEquals( - "Table should have 2 manifest", - 2, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(2); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), ids(finalId), @@ -729,9 +687,9 @@ public void testOverwriteReplaceNoConflict() { statuses(Status.DELETED)); } - @Test + @TestTemplate public void testOverwriteReplaceConflictNonPartitioned() { - Assume.assumeTrue(formatVersion == 2); + assumeThat(formatVersion).isEqualTo(2); Table unpartitioned = TestTables.create( @@ -745,7 +703,7 @@ public void testOverwriteReplaceConflictNonPartitioned() { // Concurrent Overwrite and ReplacePartitions should fail with ValidationException commit(table, unpartitioned.newOverwrite().deleteFile(FILE_UNPARTITIONED_A), branch); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> commit( table, @@ -762,7 +720,7 @@ public void testOverwriteReplaceConflictNonPartitioned() { + "[/path/to/data-unpartitioned-a.parquet]"); } - @Test + @TestTemplate public void testValidateOnlyDeletes() { commit(table, table.newAppend().appendFile(FILE_A), branch); long baseId = latestSnapshot(readMetadata(), branch).snapshotId(); @@ -780,10 +738,7 @@ public void testValidateOnlyDeletes() { branch); long finalId = latestSnapshot(readMetadata(), branch).snapshotId(); - Assert.assertEquals( - "Table should have 3 manifest", - 3, - latestSnapshot(table, branch).allManifests(table.io()).size()); + assertThat(latestSnapshot(table, branch).allManifests(table.io())).hasSize(3); validateManifestEntries( latestSnapshot(table, branch).allManifests(table.io()).get(0), ids(finalId), @@ -801,7 +756,7 @@ public void testValidateOnlyDeletes() { statuses(Status.ADDED)); } - @Test + @TestTemplate public void testEmptyPartitionPathWithUnpartitionedTable() { DataFiles.builder(PartitionSpec.unpartitioned()).withPartitionPath(""); } diff --git a/core/src/test/java/org/apache/iceberg/TestScanDataFileColumns.java b/core/src/test/java/org/apache/iceberg/TestScanDataFileColumns.java index 8baec3184fd7..408d0047e0af 100644 --- a/core/src/test/java/org/apache/iceberg/TestScanDataFileColumns.java +++ b/core/src/test/java/org/apache/iceberg/TestScanDataFileColumns.java @@ -20,21 +20,22 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.hadoop.HadoopTables; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.types.Types; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestScanDataFileColumns { private static final Schema SCHEMA = @@ -44,15 +45,15 @@ public class TestScanDataFileColumns { private static final Configuration CONF = new Configuration(); private static final Tables TABLES = new HadoopTables(CONF); - @Rule public final TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private String tableLocation = null; private Table table = null; - @Before + @BeforeEach public void createTables() throws IOException { - File location = temp.newFolder("shared"); - Assert.assertTrue(location.delete()); + File location = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(location.delete()).isTrue(); this.tableLocation = location.toString(); this.table = TABLES.create( @@ -115,10 +116,10 @@ public void createTables() throws IOException { public void testColumnStatsIgnored() { // stats columns should be suppressed by default for (FileScanTask fileTask : table.newScan().planFiles()) { - Assert.assertNull(fileTask.file().valueCounts()); - Assert.assertNull(fileTask.file().nullValueCounts()); - Assert.assertNull(fileTask.file().lowerBounds()); - Assert.assertNull(fileTask.file().upperBounds()); + assertThat(fileTask.file().valueCounts()).isNull(); + assertThat(fileTask.file().nullValueCounts()).isNull(); + assertThat(fileTask.file().lowerBounds()).isNull(); + assertThat(fileTask.file().upperBounds()).isNull(); } } @@ -126,11 +127,11 @@ public void testColumnStatsIgnored() { public void testColumnStatsLoading() { // stats columns should be suppressed by default for (FileScanTask fileTask : table.newScan().includeColumnStats().planFiles()) { - Assert.assertEquals(2, fileTask.file().valueCounts().size()); - Assert.assertEquals(2, fileTask.file().nullValueCounts().size()); - Assert.assertEquals(2, fileTask.file().lowerBounds().size()); - Assert.assertEquals(2, fileTask.file().upperBounds().size()); - Assert.assertEquals(2, fileTask.file().columnSizes().size()); + assertThat(fileTask.file().valueCounts()).hasSize(2); + assertThat(fileTask.file().nullValueCounts()).hasSize(2); + assertThat(fileTask.file().lowerBounds()).hasSize(2); + assertThat(fileTask.file().upperBounds()).hasSize(2); + assertThat(fileTask.file().columnSizes()).hasSize(2); } } @@ -139,11 +140,11 @@ public void testColumnStatsPartial() { // stats columns should be suppressed by default for (FileScanTask fileTask : table.newScan().includeColumnStats(ImmutableSet.of("id")).planFiles()) { - Assert.assertEquals(1, fileTask.file().valueCounts().size()); - Assert.assertEquals(1, fileTask.file().nullValueCounts().size()); - Assert.assertEquals(1, fileTask.file().lowerBounds().size()); - Assert.assertEquals(1, fileTask.file().upperBounds().size()); - Assert.assertEquals(1, fileTask.file().columnSizes().size()); + assertThat(fileTask.file().valueCounts()).hasSize(1); + assertThat(fileTask.file().nullValueCounts()).hasSize(1); + assertThat(fileTask.file().lowerBounds()).hasSize(1); + assertThat(fileTask.file().upperBounds()).hasSize(1); + assertThat(fileTask.file().columnSizes()).hasSize(1); } } diff --git a/core/src/test/java/org/apache/iceberg/TestScanSummary.java b/core/src/test/java/org/apache/iceberg/TestScanSummary.java index 59986016614a..b326274842a3 100644 --- a/core/src/test/java/org/apache/iceberg/TestScanSummary.java +++ b/core/src/test/java/org/apache/iceberg/TestScanSummary.java @@ -25,28 +25,24 @@ import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual; import static org.apache.iceberg.expressions.Expressions.lessThan; import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.util.Pair; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestScanSummary extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestScanSummary extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public TestScanSummary(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testSnapshotTimeRangeValidation() { long t0 = System.currentTimeMillis(); @@ -76,12 +72,8 @@ public void testSnapshotTimeRangeValidation() { // expire the first snapshot table.expireSnapshots().expireOlderThan(t1).commit(); - Assert.assertEquals( - "Should have one snapshot", 1, Lists.newArrayList(table.snapshots()).size()); - Assert.assertEquals( - "Snapshot should be the second snapshot created", - secondSnapshotId, - table.currentSnapshot().snapshotId()); + assertThat(table.snapshots()).hasSize(1); + assertThat(table.currentSnapshot().snapshotId()).isEqualTo(secondSnapshotId); // this should include the first snapshot, but it was removed from the dataset TableScan scan = @@ -90,63 +82,56 @@ public void testSnapshotTimeRangeValidation() { .filter(greaterThanOrEqual("dateCreated", t0)) .filter(lessThan("dateCreated", t2)); - Assertions.assertThatThrownBy(() -> new ScanSummary.Builder(scan).build()) + assertThatThrownBy(() -> new ScanSummary.Builder(scan).build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot satisfy time filters: time range may include expired snapshots"); } - @Test + @TestTemplate public void testTimestampRanges() { long lower = 1542750188523L; long upper = 1542750695131L; - Assert.assertEquals( - "Should use inclusive bound", - Pair.of(Long.MIN_VALUE, upper), - timestampRange(ImmutableList.of(lessThanOrEqual("ts_ms", upper)))); - - Assert.assertEquals( - "Should use lower value for upper bound", - Pair.of(Long.MIN_VALUE, upper), - timestampRange( - ImmutableList.of( - lessThanOrEqual("ts_ms", upper + 918234), lessThanOrEqual("ts_ms", upper)))); - - Assert.assertEquals( - "Should make upper bound inclusive", - Pair.of(Long.MIN_VALUE, upper - 1), - timestampRange(ImmutableList.of(lessThan("ts_ms", upper)))); - - Assert.assertEquals( - "Should use inclusive bound", - Pair.of(lower, Long.MAX_VALUE), - timestampRange(ImmutableList.of(greaterThanOrEqual("ts_ms", lower)))); - - Assert.assertEquals( - "Should use upper value for lower bound", - Pair.of(lower, Long.MAX_VALUE), - timestampRange( - ImmutableList.of( - greaterThanOrEqual("ts_ms", lower - 918234), greaterThanOrEqual("ts_ms", lower)))); - - Assert.assertEquals( - "Should make lower bound inclusive", - Pair.of(lower + 1, Long.MAX_VALUE), - timestampRange(ImmutableList.of(greaterThan("ts_ms", lower)))); - - Assert.assertEquals( - "Should set both bounds for equals", - Pair.of(lower, lower), - timestampRange(ImmutableList.of(equal("ts_ms", lower)))); - - Assert.assertEquals( - "Should set both bounds", - Pair.of(lower, upper - 1), - timestampRange( - ImmutableList.of(greaterThanOrEqual("ts_ms", lower), lessThan("ts_ms", upper)))); + assertThat(timestampRange(ImmutableList.of(lessThanOrEqual("ts_ms", upper)))) + .isEqualTo(Pair.of(Long.MIN_VALUE, upper)); + + assertThat( + timestampRange( + ImmutableList.of( + lessThanOrEqual("ts_ms", upper + 918234), lessThanOrEqual("ts_ms", upper)))) + .as("Should use lower value for upper bound") + .isEqualTo(Pair.of(Long.MIN_VALUE, upper)); + + assertThat(timestampRange(ImmutableList.of(lessThan("ts_ms", upper)))) + .as("Should make upper bound inclusive") + .isEqualTo(Pair.of(Long.MIN_VALUE, upper - 1)); + + assertThat(timestampRange(ImmutableList.of(greaterThanOrEqual("ts_ms", lower)))) + .isEqualTo(Pair.of(lower, Long.MAX_VALUE)); + + assertThat( + timestampRange( + ImmutableList.of( + greaterThanOrEqual("ts_ms", lower - 918234), + greaterThanOrEqual("ts_ms", lower)))) + .as("Should use upper value for lower bound") + .isEqualTo(Pair.of(lower, Long.MAX_VALUE)); + + assertThat(timestampRange(ImmutableList.of(greaterThan("ts_ms", lower)))) + .as("Should make lower bound inclusive") + .isEqualTo(Pair.of(lower + 1, Long.MAX_VALUE)); + + assertThat(timestampRange(ImmutableList.of(equal("ts_ms", lower)))) + .isEqualTo(Pair.of(lower, lower)); + + assertThat( + timestampRange( + ImmutableList.of(greaterThanOrEqual("ts_ms", lower), lessThan("ts_ms", upper)))) + .as("Should set both bounds and make upper bound inclusive") + .isEqualTo(Pair.of(lower, upper - 1)); // >= lower and < lower is an empty range - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> timestampRange( ImmutableList.of(greaterThanOrEqual("ts_ms", lower), lessThan("ts_ms", lower)))) @@ -154,11 +139,11 @@ public void testTimestampRanges() { .hasMessageStartingWith("No timestamps can match filters"); } - @Test + @TestTemplate public void testToMillis() { long millis = 1542750947417L; - Assert.assertEquals(1542750947000L, toMillis(millis / 1000)); - Assert.assertEquals(1542750947417L, toMillis(millis)); - Assert.assertEquals(1542750947417L, toMillis(millis * 1000 + 918)); + assertThat(toMillis(millis / 1000)).isEqualTo(1542750947000L); + assertThat(toMillis(millis)).isEqualTo(1542750947417L); + assertThat(toMillis(millis * 1000 + 918)).isEqualTo(1542750947417L); } } diff --git a/core/src/test/java/org/apache/iceberg/TestScansAndSchemaEvolution.java b/core/src/test/java/org/apache/iceberg/TestScansAndSchemaEvolution.java index 5b7886fee20b..0d5b1bc7066b 100644 --- a/core/src/test/java/org/apache/iceberg/TestScansAndSchemaEvolution.java +++ b/core/src/test/java/org/apache/iceberg/TestScansAndSchemaEvolution.java @@ -19,9 +19,13 @@ package org.apache.iceberg; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; import java.util.List; import java.util.UUID; import org.apache.avro.generic.GenericData; @@ -33,15 +37,12 @@ import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; -import org.junit.After; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; + +@ExtendWith(ParameterizedTestExtension.class) public class TestScansAndSchemaEvolution { private static final Schema SCHEMA = new Schema( @@ -52,18 +53,14 @@ public class TestScansAndSchemaEvolution { private static final PartitionSpec SPEC = PartitionSpec.builderFor(SCHEMA).identity("part").build(); - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public final int formatVersion; - - public TestScansAndSchemaEvolution(int formatVersion) { - this.formatVersion = formatVersion; - } + @Parameter private int formatVersion; - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private DataFile createDataFile(String partValue) throws IOException { List expected = RandomAvroData.generate(SCHEMA, 100, 0L); @@ -87,15 +84,15 @@ private DataFile createDataFile(String partValue) throws IOException { .build(); } - @After + @AfterEach public void cleanupTables() { TestTables.clearTables(); } - @Test + @TestTemplate public void testPartitionSourceRename() throws IOException { - File location = temp.newFolder(); - Assert.assertTrue(location.delete()); // should be created by table create + File location = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(location.delete()).isTrue(); // should be created by table create Table table = TestTables.create(location, "test", SCHEMA, SPEC, formatVersion); @@ -107,13 +104,13 @@ public void testPartitionSourceRename() throws IOException { List tasks = Lists.newArrayList(table.newScan().filter(Expressions.equal("part", "one")).planFiles()); - Assert.assertEquals("Should produce 1 matching file task", 1, tasks.size()); + assertThat(tasks).hasSize(1); table.updateSchema().renameColumn("part", "p").commit(); // plan the scan using the new name in a filter tasks = Lists.newArrayList(table.newScan().filter(Expressions.equal("p", "one")).planFiles()); - Assert.assertEquals("Should produce 1 matching file task", 1, tasks.size()); + assertThat(tasks).hasSize(1); } } diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaAndMappingUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaAndMappingUpdate.java index 5b8aff686fbc..3697678d63f0 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaAndMappingUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaAndMappingUpdate.java @@ -19,7 +19,11 @@ package org.apache.iceberg; import static org.apache.iceberg.TableProperties.PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.util.Arrays; +import java.util.List; import java.util.Objects; import java.util.Set; import org.apache.iceberg.exceptions.ValidationException; @@ -32,24 +36,17 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestSchemaAndMappingUpdate extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestSchemaAndMappingUpdate extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public TestSchemaAndMappingUpdate(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testAddPrimitiveColumn() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -64,15 +61,12 @@ public void testAddPrimitiveColumn() { validateUnchanged(mapping, updated); MappedField newMapping = updated.find("count"); - Assert.assertNotNull("Mapping for new column should be added", newMapping); - Assert.assertEquals( - "Mapping should use the assigned field ID", - (Integer) table.schema().findField("count").fieldId(), - updated.find("count").id()); - Assert.assertNull("Should not contain a nested mapping", updated.find("count").nestedMapping()); + assertThat(newMapping).isNotNull(); + assertThat(updated.find("count").id()).isEqualTo(table.schema().findField("count").fieldId()); + assertThat(updated.find("count").nestedMapping()).isNull(); } - @Test + @TestTemplate public void testAddStructColumn() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -94,31 +88,22 @@ public void testAddStructColumn() { validateUnchanged(mapping, updated); MappedField newMapping = updated.find("location"); - Assert.assertNotNull("Mapping for new column should be added", newMapping); - - Assert.assertEquals( - "Mapping should use the assigned field ID", - (Integer) table.schema().findField("location").fieldId(), - updated.find("location").id()); - Assert.assertNotNull( - "Should contain a nested mapping", updated.find("location").nestedMapping()); - - Assert.assertEquals( - "Mapping should use the assigned field ID", - (Integer) table.schema().findField("location.lat").fieldId(), - updated.find("location.lat").id()); - Assert.assertNull( - "Should not contain a nested mapping", updated.find("location.lat").nestedMapping()); - - Assert.assertEquals( - "Mapping should use the assigned field ID", - (Integer) table.schema().findField("location.long").fieldId(), - updated.find("location.long").id()); - Assert.assertNull( - "Should not contain a nested mapping", updated.find("location.long").nestedMapping()); + assertThat(newMapping).isNotNull(); + + assertThat(updated.find("location").id()) + .isEqualTo(table.schema().findField("location").fieldId()); + assertThat(updated.find("location").nestedMapping()).isNotNull(); + + assertThat(updated.find("location.lat").id()) + .isEqualTo(table.schema().findField("location.lat").fieldId()); + assertThat(updated.find("location.lat").nestedMapping()).isNull(); + + assertThat(updated.find("location.long").id()) + .isEqualTo(table.schema().findField("location.long").fieldId()); + assertThat(updated.find("location.long").nestedMapping()).isNull(); } - @Test + @TestTemplate public void testRenameColumn() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -137,14 +122,12 @@ public void testRenameColumn() { updated); MappedField updatedMapping = updated.find(idColumnId); - Assert.assertNotNull("Mapping for id column should exist", updatedMapping); - Assert.assertEquals( - "Should add the new column name to the existing mapping", - MappedField.of(idColumnId, ImmutableList.of("id", "object_id")), - updatedMapping); + assertThat(updatedMapping) + .isNotNull() + .isEqualTo(MappedField.of(idColumnId, ImmutableList.of("id", "object_id"))); } - @Test + @TestTemplate public void testDeleteColumn() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -160,7 +143,7 @@ public void testDeleteColumn() { validateUnchanged(mapping, updated); } - @Test + @TestTemplate public void testModificationWithMetricsMetrics() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -171,7 +154,7 @@ public void testModificationWithMetricsMetrics() { .set("write.metadata.metrics.column.id", "full") .commit(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> table .updateProperties() @@ -184,25 +167,19 @@ public void testModificationWithMetricsMetrics() { // Re-naming a column with metrics succeeds; table.updateSchema().renameColumn("id", "bloop").commit(); - Assert.assertNotNull( - "Make sure the metrics config now has bloop", - table.properties().get(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "bloop")); - Assert.assertNull( - "Make sure the metrics config no longer has id", - table.properties().get(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "id")); + assertThat(table.properties()) + .containsEntry(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "bloop", "full") + .doesNotContainKey(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "id"); // Deleting a column with metrics succeeds table.updateSchema().deleteColumn("bloop").commit(); // Make sure no more reference to bloop in the metrics config - Assert.assertNull( - "Make sure the metrics config no longer has id", - table.properties().get(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "id")); - Assert.assertNull( - "Make sure the metrics config no longer has bloop", - table.properties().get(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "bloop")); + assertThat(table.properties()) + .doesNotContainKey(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "id") + .doesNotContainKey(TableProperties.METRICS_MODE_COLUMN_CONF_PREFIX + "bloop"); } - @Test + @TestTemplate public void testModificationWithParquetBloomConfig() { table .updateProperties() @@ -210,20 +187,17 @@ public void testModificationWithParquetBloomConfig() { .commit(); table.updateSchema().renameColumn("id", "ID").commit(); - Assert.assertNotNull( - "Parquet bloom config for new column name ID should exists", - table.properties().get(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "ID")); - Assert.assertNull( - "Parquet bloom config for old column name id should not exists", - table.properties().get(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id")); + assertThat(table.properties()) + .containsEntry(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "ID", "true") + .doesNotContainKey(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "id"); table.updateSchema().deleteColumn("ID").commit(); - Assert.assertNull( - "Parquet bloom config for dropped column name ID should not exists", - table.properties().get(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "ID")); + assertThat(table.properties()) + .doesNotContainKey( + table.properties().get(PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX + "ID")); } - @Test + @TestTemplate public void testDeleteAndAddColumnReassign() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -248,20 +222,18 @@ public void testDeleteAndAddColumnReassign() { updated); MappedField newMapping = updated.find("id"); - Assert.assertNotNull("Mapping for id column should exist", newMapping); - Assert.assertEquals( - "Mapping should use the new field ID", (Integer) idColumnId, newMapping.id()); - Assert.assertNull("Should not contain a nested mapping", newMapping.nestedMapping()); + assertThat(newMapping).isNotNull(); + assertThat(newMapping.id()).isEqualTo(idColumnId); + assertThat(newMapping.nestedMapping()).isNull(); MappedField updatedMapping = updated.find(startIdColumnId); - Assert.assertNotNull("Mapping for original id column should exist", updatedMapping); - Assert.assertEquals( - "Mapping should use the original field ID", (Integer) startIdColumnId, updatedMapping.id()); - Assert.assertFalse("Should not use id as a name", updatedMapping.names().contains("id")); - Assert.assertNull("Should not contain a nested mapping", updatedMapping.nestedMapping()); + assertThat(updatedMapping).isNotNull(); + assertThat(updatedMapping.id()).isEqualTo(startIdColumnId); + assertThat(updatedMapping.names()).doesNotContain("id"); + assertThat(updatedMapping.nestedMapping()).isNull(); } - @Test + @TestTemplate public void testDeleteAndRenameColumnReassign() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -286,22 +258,19 @@ public void testDeleteAndRenameColumnReassign() { updated); MappedField newMapping = updated.find("id"); - Assert.assertNotNull("Mapping for id column should exist", newMapping); - Assert.assertEquals( - "Mapping should use the new field ID", (Integer) idColumnId, newMapping.id()); - Assert.assertEquals( - "Should have both names", Sets.newHashSet("id", "data"), newMapping.names()); - Assert.assertNull("Should not contain a nested mapping", newMapping.nestedMapping()); + assertThat(newMapping).isNotNull(); + assertThat(newMapping.id()).isEqualTo(idColumnId); + assertThat(newMapping.names()).isEqualTo(Sets.newHashSet("id", "data")); + assertThat(newMapping.nestedMapping()).isNull(); MappedField updatedMapping = updated.find(startIdColumnId); - Assert.assertNotNull("Mapping for original id column should exist", updatedMapping); - Assert.assertEquals( - "Mapping should use the original field ID", (Integer) startIdColumnId, updatedMapping.id()); - Assert.assertFalse("Should not use id as a name", updatedMapping.names().contains("id")); - Assert.assertNull("Should not contain a nested mapping", updatedMapping.nestedMapping()); + assertThat(updatedMapping).isNotNull(); + assertThat(updatedMapping.id()).isEqualTo(startIdColumnId); + assertThat(updatedMapping.names()).doesNotContain("id"); + assertThat(updatedMapping.nestedMapping()).isNull(); } - @Test + @TestTemplate public void testRenameAndAddColumnReassign() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -314,10 +283,8 @@ public void testRenameAndAddColumnReassign() { NameMapping afterRename = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); - Assert.assertEquals( - "Renamed column should have both names", - Sets.newHashSet("id", "object_id"), - afterRename.find(startIdColumnId).names()); + assertThat(afterRename.find(startIdColumnId).names()) + .isEqualTo(Sets.newHashSet("id", "object_id")); // add a new column with the renamed column's old name // also, rename the original column again to ensure its names are handled correctly @@ -338,21 +305,18 @@ public void testRenameAndAddColumnReassign() { updated); MappedField newMapping = updated.find("id"); - Assert.assertNotNull("Mapping for id column should exist", newMapping); - Assert.assertEquals( - "Mapping should use the new field ID", (Integer) idColumnId, newMapping.id()); - Assert.assertNull("Should not contain a nested mapping", newMapping.nestedMapping()); + assertThat(newMapping).isNotNull(); + assertThat(newMapping.id()).isEqualTo(idColumnId); + assertThat(newMapping.nestedMapping()).isNull(); MappedField updatedMapping = updated.find(startIdColumnId); - Assert.assertNotNull("Mapping for original id column should exist", updatedMapping); - Assert.assertEquals( - "Mapping should use the original field ID", (Integer) startIdColumnId, updatedMapping.id()); - Assert.assertEquals( - "Should not use id as a name", Sets.newHashSet("object_id", "oid"), updatedMapping.names()); - Assert.assertNull("Should not contain a nested mapping", updatedMapping.nestedMapping()); + assertThat(updatedMapping).isNotNull(); + assertThat(updatedMapping.id()).isEqualTo(startIdColumnId); + assertThat(updatedMapping.names()).isEqualTo(Sets.newHashSet("object_id", "oid")); + assertThat(updatedMapping.nestedMapping()).isNull(); } - @Test + @TestTemplate public void testRenameAndRenameColumnReassign() { NameMapping mapping = MappingUtil.create(table.schema()); String mappingJson = NameMappingParser.toJson(mapping); @@ -365,10 +329,8 @@ public void testRenameAndRenameColumnReassign() { NameMapping afterRename = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); - Assert.assertEquals( - "Renamed column should have both names", - Sets.newHashSet("id", "object_id"), - afterRename.find(startIdColumnId).names()); + assertThat(afterRename.find(startIdColumnId).names()) + .isEqualTo(Sets.newHashSet("id", "object_id")); // rename the data column to the renamed column's old name // also, rename the original column again to ensure its names are handled correctly @@ -385,28 +347,23 @@ public void testRenameAndRenameColumnReassign() { updated); MappedField newMapping = updated.find("id"); - Assert.assertNotNull("Mapping for id column should exist", newMapping); - Assert.assertEquals( - "Renamed column should have both names", Sets.newHashSet("id", "data"), newMapping.names()); - Assert.assertEquals( - "Mapping should use the new field ID", (Integer) idColumnId, newMapping.id()); - Assert.assertNull("Should not contain a nested mapping", newMapping.nestedMapping()); + assertThat(newMapping).isNotNull(); + assertThat(newMapping.names()).isEqualTo(Sets.newHashSet("id", "data")); + assertThat(newMapping.id()).isEqualTo(idColumnId); + assertThat(newMapping.nestedMapping()).isNull(); MappedField updatedMapping = updated.find(startIdColumnId); - Assert.assertNotNull("Mapping for original id column should exist", updatedMapping); - Assert.assertEquals( - "Mapping should use the original field ID", (Integer) startIdColumnId, updatedMapping.id()); - Assert.assertEquals( - "Should not use id as a name", Sets.newHashSet("object_id", "oid"), updatedMapping.names()); - Assert.assertNull("Should not contain a nested mapping", updatedMapping.nestedMapping()); + assertThat(updatedMapping).isNotNull(); + assertThat(updatedMapping.id()).isEqualTo(startIdColumnId); + assertThat(updatedMapping.names()).isEqualTo(Sets.newHashSet("object_id", "oid")); + assertThat(updatedMapping.nestedMapping()).isNull(); } /** Asserts that the fields in the original mapping are unchanged in the updated mapping. */ private void validateUnchanged(NameMapping original, NameMapping updated) { MappedFields updatedFields = updated.asMappedFields(); for (MappedField field : original.asMappedFields().fields()) { - Assert.assertEquals( - "Existing fields should not change", field, updatedFields.field(field.id())); + assertThat(updatedFields.field(field.id())).isEqualTo(field); } } @@ -414,8 +371,7 @@ private void validateUnchanged(NameMapping original, NameMapping updated) { private void validateUnchanged(Iterable fields, NameMapping updated) { MappedFields updatedFields = updated.asMappedFields(); for (MappedField field : fields) { - Assert.assertEquals( - "Existing fields should not change", field, updatedFields.field(field.id())); + assertThat(updatedFields.field(field.id())).isEqualTo(field); } } } diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaID.java b/core/src/test/java/org/apache/iceberg/TestSchemaID.java index f27fd92fa3c1..33ac84d20801 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaID.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaID.java @@ -20,32 +20,26 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; -@RunWith(Parameterized.class) -public class TestSchemaID extends TableTestBase { +@ExtendWith(ParameterizedTestExtension.class) +public class TestSchemaID extends TestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public TestSchemaID(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testNoChange() { int onlyId = table.schema().schemaId(); Map onlySchemaMap = schemaMap(table.schema()); @@ -54,46 +48,30 @@ public void testNoChange() { table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); TestHelpers.assertSameSchemaMap(onlySchemaMap, table.schemas()); - Assert.assertEquals( - "Current snapshot's schemaId should be the current", - table.schema().schemaId(), - (int) table.currentSnapshot().schemaId()); + assertThat(table.currentSnapshot().schemaId()).isEqualTo(table.schema().schemaId()); - Assert.assertEquals( - "Schema ids should be correct in snapshots", - ImmutableList.of(onlyId), - Lists.transform(Lists.newArrayList(table.snapshots()), Snapshot::schemaId)); + assertThat(table.snapshots()).extracting(Snapshot::schemaId).containsExactly(onlyId); // remove file from table table.newDelete().deleteFile(FILE_A).commit(); TestHelpers.assertSameSchemaMap(onlySchemaMap, table.schemas()); - Assert.assertEquals( - "Current snapshot's schemaId should be the current", - table.schema().schemaId(), - (int) table.currentSnapshot().schemaId()); + assertThat(table.currentSnapshot().schemaId()).isEqualTo(table.schema().schemaId()); - Assert.assertEquals( - "Schema ids should be correct in snapshots", - ImmutableList.of(onlyId, onlyId), - Lists.transform(Lists.newArrayList(table.snapshots()), Snapshot::schemaId)); + assertThat(table.snapshots()).extracting(Snapshot::schemaId).containsExactly(onlyId, onlyId); // add file to table table.newFastAppend().appendFile(FILE_A2).commit(); TestHelpers.assertSameSchemaMap(onlySchemaMap, table.schemas()); - Assert.assertEquals( - "Current snapshot's schemaId should be the current", - table.schema().schemaId(), - (int) table.currentSnapshot().schemaId()); - - Assert.assertEquals( - "Schema ids should be correct in snapshots", - ImmutableList.of(onlyId, onlyId, onlyId), - Lists.transform(Lists.newArrayList(table.snapshots()), Snapshot::schemaId)); + assertThat(table.currentSnapshot().schemaId()).isEqualTo(table.schema().schemaId()); + + assertThat(table.snapshots()) + .extracting(Snapshot::schemaId) + .containsExactly(onlyId, onlyId, onlyId); } - @Test + @TestTemplate public void testSchemaIdChangeInSchemaUpdate() { Schema originalSchema = table.schema(); @@ -101,15 +79,11 @@ public void testSchemaIdChangeInSchemaUpdate() { table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); TestHelpers.assertSameSchemaMap(schemaMap(table.schema()), table.schemas()); - Assert.assertEquals( - "Current snapshot's schemaId should be the current", - table.schema().schemaId(), - (int) table.currentSnapshot().schemaId()); + assertThat(table.currentSnapshot().schemaId()).isEqualTo(table.schema().schemaId()); - Assert.assertEquals( - "Schema ids should be correct in snapshots", - ImmutableList.of(originalSchema.schemaId()), - Lists.transform(Lists.newArrayList(table.snapshots()), Snapshot::schemaId)); + assertThat(table.snapshots()) + .extracting(Snapshot::schemaId) + .containsExactly(originalSchema.schemaId()); // update schema table.updateSchema().addColumn("data2", Types.StringType.get()).commit(); @@ -122,50 +96,37 @@ public void testSchemaIdChangeInSchemaUpdate() { optional(3, "data2", Types.StringType.get())); TestHelpers.assertSameSchemaMap(schemaMap(originalSchema, updatedSchema), table.schemas()); - Assert.assertEquals( - "Current snapshot's schemaId should be old since update schema doesn't create new snapshot", - originalSchema.schemaId(), - (int) table.currentSnapshot().schemaId()); - Assert.assertEquals( - "Current schema should match", updatedSchema.asStruct(), table.schema().asStruct()); - - Assert.assertEquals( - "Schema ids should be correct in snapshots", - ImmutableList.of(originalSchema.schemaId()), - Lists.transform(Lists.newArrayList(table.snapshots()), Snapshot::schemaId)); + assertThat(table.currentSnapshot().schemaId()) + .as( + "Current snapshot's schemaId should be old since update schema doesn't create new snapshot") + .isEqualTo(originalSchema.schemaId()); + assertThat(table.schema().asStruct()).isEqualTo(updatedSchema.asStruct()); + + assertThat(table.snapshots()) + .extracting(Snapshot::schemaId) + .containsExactly(originalSchema.schemaId()); // remove file from table table.newDelete().deleteFile(FILE_A).commit(); TestHelpers.assertSameSchemaMap(schemaMap(originalSchema, updatedSchema), table.schemas()); - Assert.assertEquals( - "Current snapshot's schemaId should be the current", - updatedSchema.schemaId(), - (int) table.currentSnapshot().schemaId()); - Assert.assertEquals( - "Current schema should match", updatedSchema.asStruct(), table.schema().asStruct()); - - Assert.assertEquals( - "Schema ids should be correct in snapshots", - ImmutableList.of(originalSchema.schemaId(), updatedSchema.schemaId()), - Lists.transform(Lists.newArrayList(table.snapshots()), Snapshot::schemaId)); + assertThat(table.currentSnapshot().schemaId()).isEqualTo(updatedSchema.schemaId()); + assertThat(table.schema().asStruct()).isEqualTo(updatedSchema.asStruct()); + assertThat(table.snapshots()) + .extracting(Snapshot::schemaId) + .containsExactly(originalSchema.schemaId(), updatedSchema.schemaId()); // add files to table table.newAppend().appendFile(FILE_A2).commit(); TestHelpers.assertSameSchemaMap(schemaMap(originalSchema, updatedSchema), table.schemas()); - Assert.assertEquals( - "Current snapshot's schemaId should be the current", - updatedSchema.schemaId(), - (int) table.currentSnapshot().schemaId()); - Assert.assertEquals( - "Current schema should match", updatedSchema.asStruct(), table.schema().asStruct()); - - Assert.assertEquals( - "Schema ids should be correct in snapshots", - ImmutableList.of( - originalSchema.schemaId(), updatedSchema.schemaId(), updatedSchema.schemaId()), - Lists.transform(Lists.newArrayList(table.snapshots()), Snapshot::schemaId)); + assertThat(table.currentSnapshot().schemaId()).isEqualTo(updatedSchema.schemaId()); + assertThat(table.schema().asStruct()).isEqualTo(updatedSchema.asStruct()); + + assertThat(table.snapshots()) + .extracting(Snapshot::schemaId) + .containsExactly( + originalSchema.schemaId(), updatedSchema.schemaId(), updatedSchema.schemaId()); } private Map schemaMap(Schema... schemas) { diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java index 1d903dfbb1a5..943ff9f51acd 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java @@ -20,6 +20,8 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.util.List; import java.util.Set; @@ -30,9 +32,7 @@ import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.Pair; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchemaUpdate { private static final Schema SCHEMA = @@ -84,7 +84,7 @@ public class TestSchemaUpdate { @Test public void testNoChanges() { Schema identical = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID).apply(); - Assert.assertEquals("Should not include any changes", SCHEMA.asStruct(), identical.asStruct()); + assertThat(identical.asStruct()).isEqualTo(SCHEMA.asStruct()); } @Test @@ -114,10 +114,7 @@ public void testDeleteFields() { Schema del = new SchemaUpdate(SCHEMA, 19).deleteColumn(name).apply(); - Assert.assertEquals( - "Should match projection with '" + name + "' removed", - TypeUtil.project(SCHEMA, selected).asStruct(), - del.asStruct()); + assertThat(del.asStruct()).isEqualTo(TypeUtil.project(SCHEMA, selected).asStruct()); } } @@ -148,10 +145,7 @@ public void testDeleteFieldsCaseSensitiveDisabled() { Schema del = new SchemaUpdate(SCHEMA, 19).caseSensitive(false).deleteColumn(name).apply(); - Assert.assertEquals( - "Should match projection with '" + name + "' removed", - TypeUtil.project(SCHEMA, selected).asStruct(), - del.asStruct()); + assertThat(del.asStruct()).isEqualTo(TypeUtil.project(SCHEMA, selected).asStruct()); } } @@ -206,7 +200,7 @@ public void testUpdateTypes() { .updateColumn("locations.long", Types.DoubleType.get()) .apply(); - Assert.assertEquals("Should convert types", expected, updated.asStruct()); + assertThat(updated.asStruct()).isEqualTo(expected); } @Test @@ -261,7 +255,7 @@ public void testUpdateTypesCaseInsensitive() { .updateColumn("Locations.Long", Types.DoubleType.get()) .apply(); - Assert.assertEquals("Should convert types", expected, updated.asStruct()); + assertThat(updated.asStruct()).isEqualTo(expected); } @Test @@ -299,13 +293,12 @@ public void testUpdateFailure() { if (fromType.equals(toType) || allowedUpdates.contains(Pair.of(fromType, toType))) { Schema expected = new Schema(required(1, "col", toType)); Schema result = new SchemaUpdate(fromSchema, 1).updateColumn("col", toType).apply(); - Assert.assertEquals("Should allow update", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); continue; } String typeChange = fromType + " -> " + toType.toString(); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(fromSchema, 1).updateColumn("col", toType)) + assertThatThrownBy(() -> new SchemaUpdate(fromSchema, 1).updateColumn("col", toType)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot change column type: col: " + typeChange); } @@ -366,7 +359,7 @@ public void testRename() { .renameColumn("points.y", "y.y") // has a '.' in the field name .apply(); - Assert.assertEquals("Should rename all fields", expected, renamed.asStruct()); + assertThat(renamed.asStruct()).isEqualTo(expected); } @Test @@ -424,7 +417,7 @@ public void testRenameCaseInsensitive() { .renameColumn("Points.y", "y.y") // has a '.' in the field name .apply(); - Assert.assertEquals("Should rename all fields", expected, renamed.asStruct()); + assertThat(renamed.asStruct()).isEqualTo(expected); } @Test @@ -483,7 +476,7 @@ public void testAddFields() { .addColumn("points", "t.t", Types.LongType.get()) // name with '.' .apply(); - Assert.assertEquals("Should match with added fields", expected.asStruct(), added.asStruct()); + assertThat(added.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -506,8 +499,7 @@ public void testAddNestedStruct() { Schema result = new SchemaUpdate(schema, 1).addColumn("location", struct).apply(); - Assert.assertEquals( - "Should add struct and reassign column IDs", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -546,8 +538,7 @@ public void testAddNestedMapOfStructs() { Schema result = new SchemaUpdate(schema, 1).addColumn("locations", map).apply(); - Assert.assertEquals( - "Should add map and reassign column IDs", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -574,8 +565,7 @@ public void testAddNestedListOfStructs() { Schema result = new SchemaUpdate(schema, 1).addColumn("locations", list).apply(); - Assert.assertEquals( - "Should add map and reassign column IDs", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -586,7 +576,7 @@ public void testAddRequiredColumn() { required(1, "id", Types.IntegerType.get()), required(2, "data", Types.StringType.get())); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 1).addRequiredColumn("data", Types.StringType.get())) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Incompatible change: cannot add required column: data"); @@ -597,14 +587,14 @@ public void testAddRequiredColumn() { .addRequiredColumn("data", Types.StringType.get()) .apply(); - Assert.assertEquals("Should add required column", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); } @Test public void testAddRequiredColumnCaseInsensitive() { Schema schema = new Schema(required(1, "id", Types.IntegerType.get())); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 1) .caseSensitive(false) @@ -622,8 +612,7 @@ public void testMakeColumnOptional() { Schema result = new SchemaUpdate(schema, 1).makeColumnOptional("id").apply(); - Assert.assertEquals( - "Should update column to be optional", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -631,7 +620,7 @@ public void testRequireColumn() { Schema schema = new Schema(optional(1, "id", Types.IntegerType.get())); Schema expected = new Schema(required(1, "id", Types.IntegerType.get())); - Assertions.assertThatThrownBy(() -> new SchemaUpdate(schema, 1).requireColumn("id")) + assertThatThrownBy(() -> new SchemaUpdate(schema, 1).requireColumn("id")) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot change column nullability: id: optional -> required"); @@ -641,8 +630,7 @@ public void testRequireColumn() { Schema result = new SchemaUpdate(schema, 1).allowIncompatibleChanges().requireColumn("id").apply(); - Assert.assertEquals( - "Should update column to be required", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -657,8 +645,7 @@ public void testRequireColumnCaseInsensitive() { .requireColumn("ID") .apply(); - Assert.assertEquals( - "Should update column to be required", expected.asStruct(), result.asStruct()); + assertThat(result.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -729,13 +716,13 @@ public void testMixedChanges() { "locations", "description", Types.StringType.get(), "Location description") .apply(); - Assert.assertEquals("Should match with added fields", expected.asStruct(), updated.asStruct()); + assertThat(updated.asStruct()).isEqualTo(expected.asStruct()); } @Test public void testAmbiguousAdd() { // preferences.booleans could be top-level or a field of preferences - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.addColumn("preferences.booleans", Types.BooleanType.get()); @@ -746,7 +733,7 @@ public void testAmbiguousAdd() { @Test public void testAddAlreadyExists() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.addColumn("preferences", "feature1", Types.BooleanType.get()); @@ -754,7 +741,7 @@ public void testAddAlreadyExists() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot add column, name already exists: preferences.feature1"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.addColumn("preferences", Types.BooleanType.get()); @@ -774,7 +761,7 @@ public void testDeleteThenAdd() { .addColumn("id", optional(2, "id", Types.IntegerType.get()).type()) .apply(); - Assert.assertEquals("Should match with added fields", expected.asStruct(), updated.asStruct()); + assertThat(updated.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -827,13 +814,12 @@ public void testDeleteThenAddNested() { .addColumn("preferences", "feature1", Types.BooleanType.get()) .apply(); - Assert.assertEquals( - "Should match with added fields", expectedNested.asStruct(), updatedNested.asStruct()); + assertThat(updatedNested.asStruct()).isEqualTo(expectedNested.asStruct()); } @Test public void testDeleteMissingColumn() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.deleteColumn("col"); @@ -844,7 +830,7 @@ public void testDeleteMissingColumn() { @Test public void testAddDeleteConflict() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.addColumn("col", Types.IntegerType.get()).deleteColumn("col"); @@ -852,7 +838,7 @@ public void testAddDeleteConflict() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot delete missing column: col"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update @@ -865,7 +851,7 @@ public void testAddDeleteConflict() { @Test public void testRenameMissingColumn() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.renameColumn("col", "fail"); @@ -876,7 +862,7 @@ public void testRenameMissingColumn() { @Test public void testRenameDeleteConflict() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.renameColumn("id", "col").deleteColumn("id"); @@ -884,7 +870,7 @@ public void testRenameDeleteConflict() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot delete a column that has updates: id"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.renameColumn("id", "col").deleteColumn("col"); @@ -895,7 +881,7 @@ public void testRenameDeleteConflict() { @Test public void testDeleteRenameConflict() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.deleteColumn("id").renameColumn("id", "identifier"); @@ -906,7 +892,7 @@ public void testDeleteRenameConflict() { @Test public void testUpdateMissingColumn() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.updateColumn("col", Types.DateType.get()); @@ -917,7 +903,7 @@ public void testUpdateMissingColumn() { @Test public void testUpdateDeleteConflict() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.updateColumn("id", Types.LongType.get()).deleteColumn("id"); @@ -928,7 +914,7 @@ public void testUpdateDeleteConflict() { @Test public void testDeleteUpdateConflict() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> { UpdateSchema update = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID); update.deleteColumn("id").updateColumn("id", Types.LongType.get()); @@ -939,7 +925,7 @@ public void testDeleteUpdateConflict() { @Test public void testDeleteMapKey() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) .deleteColumn("locations.key") @@ -950,7 +936,7 @@ public void testDeleteMapKey() { @Test public void testAddFieldToMapKey() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) .addColumn("locations.key", "address_line_2", Types.StringType.get()) @@ -961,7 +947,7 @@ public void testAddFieldToMapKey() { @Test public void testAlterMapKey() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) .updateColumn("locations.key.zip", Types.LongType.get()) @@ -978,7 +964,7 @@ public void testUpdateMapKey() { 1, "m", Types.MapType.ofOptional(2, 3, Types.IntegerType.get(), Types.DoubleType.get()))); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 3).updateColumn("m.key", Types.LongType.get()).apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot update map keys: map"); @@ -987,7 +973,7 @@ public void testUpdateMapKey() { @Test public void testUpdateAddedColumnDoc() { Schema schema = new Schema(required(1, "i", Types.IntegerType.get())); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 3) .addColumn("value", Types.LongType.get()) @@ -1000,7 +986,7 @@ public void testUpdateAddedColumnDoc() { @Test public void testUpdateDeletedColumnDoc() { Schema schema = new Schema(required(1, "i", Types.IntegerType.get())); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 3) .deleteColumn("i") @@ -1035,7 +1021,7 @@ public void testMultipleMoves() { .moveBefore("d", "a") .apply(); - Assert.assertEquals("Schema should match", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1049,7 +1035,7 @@ public void testMoveTopLevelColumnFirst() { Schema actual = new SchemaUpdate(schema, 2).moveFirst("data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1063,7 +1049,7 @@ public void testMoveTopLevelColumnBeforeFirst() { Schema actual = new SchemaUpdate(schema, 2).moveBefore("data", "id").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1077,7 +1063,7 @@ public void testMoveTopLevelColumnAfterLast() { Schema actual = new SchemaUpdate(schema, 2).moveAfter("id", "data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1095,7 +1081,7 @@ public void testMoveTopLevelColumnAfter() { Schema actual = new SchemaUpdate(schema, 3).moveAfter("ts", "id").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1113,7 +1099,7 @@ public void testMoveTopLevelColumnBefore() { Schema actual = new SchemaUpdate(schema, 3).moveBefore("ts", "data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1139,7 +1125,7 @@ public void testMoveNestedFieldFirst() { Schema actual = new SchemaUpdate(schema, 4).moveFirst("struct.data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1165,7 +1151,7 @@ public void testMoveNestedFieldBeforeFirst() { Schema actual = new SchemaUpdate(schema, 4).moveBefore("struct.data", "struct.count").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1191,7 +1177,7 @@ public void testMoveNestedFieldAfterLast() { Schema actual = new SchemaUpdate(schema, 4).moveAfter("struct.count", "struct.data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1219,7 +1205,7 @@ public void testMoveNestedFieldAfter() { Schema actual = new SchemaUpdate(schema, 5).moveAfter("struct.ts", "struct.count").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1247,7 +1233,7 @@ public void testMoveNestedFieldBefore() { Schema actual = new SchemaUpdate(schema, 5).moveBefore("struct.ts", "struct.data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1279,7 +1265,7 @@ public void testMoveListElementField() { Schema actual = new SchemaUpdate(schema, 6).moveBefore("list.ts", "list.data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1315,7 +1301,7 @@ public void testMoveMapValueStructField() { Schema actual = new SchemaUpdate(schema, 7).moveBefore("map.ts", "map.data").apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1335,7 +1321,7 @@ public void testMoveAddedTopLevelColumn() { .moveAfter("ts", "id") .apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1358,7 +1344,7 @@ public void testMoveAddedTopLevelColumnAfterAddedColumn() { .moveAfter("count", "ts") .apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1389,7 +1375,7 @@ public void testMoveAddedNestedStructField() { .moveBefore("struct.ts", "struct.count") .apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1423,7 +1409,7 @@ public void testMoveAddedNestedStructFieldBeforeAddedColumn() { .moveBefore("struct.size", "struct.ts") .apply(); - Assert.assertEquals("Should move data first", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -1432,11 +1418,11 @@ public void testMoveSelfReferenceFails() { new Schema( required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); - Assertions.assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveBefore("id", "id").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveBefore("id", "id").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move id before itself"); - Assertions.assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveAfter("id", "id").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveAfter("id", "id").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move id after itself"); } @@ -1447,17 +1433,15 @@ public void testMoveMissingColumnFails() { new Schema( required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); - Assertions.assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveFirst("items").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveFirst("items").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: items"); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 2).moveBefore("items", "id").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveBefore("items", "id").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: items"); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 2).moveAfter("items", "data").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveAfter("items", "data").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: items"); } @@ -1468,7 +1452,7 @@ public void testMoveBeforeAddFails() { new Schema( required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 2) .moveFirst("ts") @@ -1477,7 +1461,7 @@ public void testMoveBeforeAddFails() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: ts"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 2) .moveBefore("ts", "id") @@ -1486,7 +1470,7 @@ public void testMoveBeforeAddFails() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: ts"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schema, 2) .moveAfter("ts", "data") @@ -1502,13 +1486,11 @@ public void testMoveMissingReferenceColumnFails() { new Schema( required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 2).moveBefore("id", "items").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveBefore("id", "items").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move id before missing column: items"); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 2).moveAfter("data", "items").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 2).moveAfter("data", "items").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move data after missing column: items"); } @@ -1524,8 +1506,7 @@ public void testMovePrimitiveMapKeyFails() { "map", Types.MapType.ofRequired(4, 5, Types.StringType.get(), Types.StringType.get()))); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 5).moveBefore("map.key", "map.value").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 5).moveBefore("map.key", "map.value").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move fields in non-struct type: map"); } @@ -1541,8 +1522,7 @@ public void testMovePrimitiveMapValueFails() { "map", Types.MapType.ofRequired(4, 5, Types.StringType.get(), Types.StructType.of()))); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 5).moveBefore("map.value", "map.key").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 5).moveBefore("map.value", "map.key").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move fields in non-struct type: map>"); } @@ -1555,8 +1535,7 @@ public void testMovePrimitiveListElementFails() { required(2, "data", Types.StringType.get()), optional(3, "list", Types.ListType.ofRequired(4, Types.StringType.get()))); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 4).moveBefore("list.element", "list").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 4).moveBefore("list.element", "list").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move fields in non-struct type: list"); } @@ -1574,8 +1553,7 @@ public void testMoveTopLevelBetweenStructsFails() { required(4, "x", Types.IntegerType.get()), required(5, "y", Types.IntegerType.get())))); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 5).moveBefore("a", "struct.x").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 5).moveBefore("a", "struct.x").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move field a to a different struct"); } @@ -1597,8 +1575,7 @@ public void testMoveBetweenStructsFails() { required(5, "x", Types.IntegerType.get()), required(6, "y", Types.IntegerType.get())))); - Assertions.assertThatThrownBy( - () -> new SchemaUpdate(schema, 6).moveBefore("s2.x", "s1.a").apply()) + assertThatThrownBy(() -> new SchemaUpdate(schema, 6).moveBefore("s2.x", "s1.a").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move field s2.x to a different struct"); } @@ -1608,10 +1585,9 @@ public void testAddExistingIdentifierFields() { Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID).setIdentifierFields("id").apply(); - Assert.assertEquals( - "add an existing field as identifier field should succeed", - Sets.newHashSet(newSchema.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("add an existing field as identifier field should succeed") + .containsExactly(newSchema.findField("id").fieldId()); } @Test @@ -1623,11 +1599,11 @@ public void testAddNewIdentifierFieldColumns() { .setIdentifierFields("id", "new_field") .apply(); - Assert.assertEquals( - "add column then set as identifier should succeed", - Sets.newHashSet( - newSchema.findField("id").fieldId(), newSchema.findField("new_field").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("add column then set as identifier should succeed") + .isEqualTo( + Sets.newHashSet( + newSchema.findField("id").fieldId(), newSchema.findField("new_field").fieldId())); newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) @@ -1636,11 +1612,11 @@ public void testAddNewIdentifierFieldColumns() { .addRequiredColumn("new_field", Types.StringType.get()) .apply(); - Assert.assertEquals( - "set identifier then add column should succeed", - Sets.newHashSet( - newSchema.findField("id").fieldId(), newSchema.findField("new_field").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("set identifier then add column should succeed") + .isEqualTo( + Sets.newHashSet( + newSchema.findField("id").fieldId(), newSchema.findField("new_field").fieldId())); } @Test @@ -1660,10 +1636,9 @@ public void testAddNestedIdentifierFieldColumns() { .setIdentifierFields("required_struct.field") .apply(); - Assert.assertEquals( - "set existing nested field as identifier should succeed", - Sets.newHashSet(newSchema.findField("required_struct.field").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("set existing nested field as identifier should succeed") + .containsExactly(newSchema.findField("required_struct.field").fieldId()); newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) @@ -1676,10 +1651,9 @@ public void testAddNestedIdentifierFieldColumns() { .setIdentifierFields("new.field") .apply(); - Assert.assertEquals( - "set newly added nested field as identifier should succeed", - Sets.newHashSet(newSchema.findField("new.field").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("set newly added nested field as identifier should succeed") + .containsExactly(newSchema.findField("new.field").fieldId()); newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) @@ -1696,10 +1670,9 @@ public void testAddNestedIdentifierFieldColumns() { .setIdentifierFields("new.field.nested") .apply(); - Assert.assertEquals( - "set newly added multi-layer nested field as identifier should succeed", - Sets.newHashSet(newSchema.findField("new.field.nested").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("set newly added multi-layer nested field as identifier should succeed") + .containsExactly(newSchema.findField("new.field.nested").fieldId()); } @Test @@ -1711,11 +1684,11 @@ public void testAddDottedIdentifierFieldColumns() { .setIdentifierFields("id", "dot.field") .apply(); - Assert.assertEquals( - "add a field with dot as identifier should succeed", - Sets.newHashSet( - newSchema.findField("id").fieldId(), newSchema.findField("dot.field").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("add a field with dot as identifier should succeed") + .isEqualTo( + Sets.newHashSet( + newSchema.findField("id").fieldId(), newSchema.findField("dot.field").fieldId())); } @Test @@ -1733,22 +1706,19 @@ public void testRemoveIdentifierFields() { .setIdentifierFields("new_field", "new_field2") .apply(); - Assert.assertEquals( - "remove an identifier field should succeed", - Sets.newHashSet( - newSchema.findField("new_field").fieldId(), - newSchema.findField("new_field2").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("remove an identifier field should succeed") + .isEqualTo( + Sets.newHashSet( + newSchema.findField("new_field").fieldId(), + newSchema.findField("new_field2").fieldId())); newSchema = new SchemaUpdate(newSchema, SCHEMA_LAST_COLUMN_ID) .setIdentifierFields(Sets.newHashSet()) .apply(); - Assert.assertEquals( - "remove all identifier fields should succeed", - Sets.newHashSet(), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()).isEmpty(); } @SuppressWarnings("MethodLength") @@ -1760,29 +1730,25 @@ public void testSetIdentifierFieldsFails() { required(2, "float", Types.FloatType.get()), required(3, "double", Types.DoubleType.get())); - Assertions.assertThatThrownBy( - () -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(999))) + assertThatThrownBy(() -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(999))) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot add fieldId 999 as an identifier field: field does not exist"); - Assertions.assertThatThrownBy( - () -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(1))) + assertThatThrownBy(() -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(1))) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot add field id as an identifier field: not a required field"); - Assertions.assertThatThrownBy( - () -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(2))) + assertThatThrownBy(() -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(2))) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Cannot add field float as an identifier field: must not be float or double field"); - Assertions.assertThatThrownBy( - () -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(3))) + assertThatThrownBy(() -> new Schema(testSchema.asStruct().fields(), ImmutableSet.of(3))) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Cannot add field double as an identifier field: must not be float or double field"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) .setIdentifierFields("unknown") @@ -1791,7 +1757,7 @@ public void testSetIdentifierFieldsFails() { .hasMessage( "Cannot add field unknown as an identifier field: not found in current schema or added columns"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) .setIdentifierFields("locations") @@ -1800,13 +1766,13 @@ public void testSetIdentifierFieldsFails() { .hasMessage( "Cannot add field locations as an identifier field: not a primitive type field"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID).setIdentifierFields("data").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot add field data as an identifier field: not a required field"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) .setIdentifierFields("locations.key.zip") @@ -1816,7 +1782,7 @@ public void testSetIdentifierFieldsFails() { "Cannot add field zip as an identifier field: must not be nested in " + SCHEMA.findField("locations")); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) .setIdentifierFields("points.element.x") @@ -1865,7 +1831,7 @@ public void testSetIdentifierFieldsFails() { int lastColId = SCHEMA_LAST_COLUMN_ID + 15; - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(newSchema, lastColId) .setIdentifierFields("required_list.element.x") @@ -1875,19 +1841,19 @@ public void testSetIdentifierFieldsFails() { "Cannot add field x as an identifier field: must not be nested in " + newSchema.findField("required_list")); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(newSchema, lastColId).setIdentifierFields("col_double").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Cannot add field col_double as an identifier field: must not be float or double field"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(newSchema, lastColId).setIdentifierFields("col_float").apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Cannot add field col_float as an identifier field: must not be float or double field"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(newSchema, lastColId) .setIdentifierFields("new_map.value.val_col") @@ -1897,7 +1863,7 @@ public void testSetIdentifierFieldsFails() { "Cannot add field val_col as an identifier field: must not be nested in " + newSchema.findField("new_map")); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(newSchema, lastColId) .setIdentifierFields("new.fields.element.nested") @@ -1907,7 +1873,7 @@ public void testSetIdentifierFieldsFails() { "Cannot add field nested as an identifier field: must not be nested in " + newSchema.findField("new.fields")); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(newSchema, lastColId) .setIdentifierFields("preferences.feature1") @@ -1923,23 +1889,23 @@ public void testDeleteIdentifierFieldColumns() { Schema schemaWithIdentifierFields = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID).setIdentifierFields("id").apply(); - Assert.assertEquals( - "delete column and then reset identifier field should succeed", - Sets.newHashSet(), - new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) - .deleteColumn("id") - .setIdentifierFields(Sets.newHashSet()) - .apply() - .identifierFieldIds()); + assertThat( + new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) + .deleteColumn("id") + .setIdentifierFields(Sets.newHashSet()) + .apply() + .identifierFieldIds()) + .as("delete column and then reset identifier field should succeed") + .isEmpty(); - Assert.assertEquals( - "delete reset identifier field and then delete column should succeed", - Sets.newHashSet(), - new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) - .setIdentifierFields(Sets.newHashSet()) - .deleteColumn("id") - .apply() - .identifierFieldIds()); + assertThat( + new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) + .setIdentifierFields(Sets.newHashSet()) + .deleteColumn("id") + .apply() + .identifierFieldIds()) + .as("delete reset identifier field and then delete column should succeed") + .isEmpty(); } @Test @@ -1947,7 +1913,7 @@ public void testDeleteIdentifierFieldColumnsFails() { Schema schemaWithIdentifierFields = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID).setIdentifierFields("id").apply(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) .deleteColumn("id") @@ -1970,7 +1936,7 @@ public void testDeleteContainingNestedIdentifierFieldColumnsFails() { .setIdentifierFields("out.nested") .apply(); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new SchemaUpdate(newSchema, SCHEMA_LAST_COLUMN_ID + 2).deleteColumn("out").apply()) .isInstanceOf(IllegalArgumentException.class) @@ -1989,10 +1955,9 @@ public void testRenameIdentifierFields() { .renameColumn("id", "id2") .apply(); - Assert.assertEquals( - "rename should not affect identifier fields", - Sets.newHashSet(SCHEMA.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("rename should not affect identifier fields") + .containsExactly(SCHEMA.findField("id").fieldId()); } @Test @@ -2005,28 +1970,25 @@ public void testMoveIdentifierFields() { .moveAfter("id", "locations") .apply(); - Assert.assertEquals( - "move after should not affect identifier fields", - Sets.newHashSet(SCHEMA.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("move after should not affect identifier fields") + .containsExactly(SCHEMA.findField("id").fieldId()); newSchema = new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) .moveBefore("id", "locations") .apply(); - Assert.assertEquals( - "move before should not affect identifier fields", - Sets.newHashSet(SCHEMA.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("move before should not affect identifier fields") + .containsExactly(SCHEMA.findField("id").fieldId()); newSchema = new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID).moveFirst("id").apply(); - Assert.assertEquals( - "move first should not affect identifier fields", - Sets.newHashSet(SCHEMA.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("move first should not affect identifier fields") + .containsExactly(SCHEMA.findField("id").fieldId()); } @Test @@ -2040,10 +2002,9 @@ public void testMoveIdentifierFieldsCaseInsensitive() { .moveAfter("iD", "locations") .apply(); - Assert.assertEquals( - "move after should not affect identifier fields", - Sets.newHashSet(SCHEMA.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("move after should not affect identifier fields") + .containsExactly(SCHEMA.findField("id").fieldId()); newSchema = new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) @@ -2051,10 +2012,9 @@ public void testMoveIdentifierFieldsCaseInsensitive() { .moveBefore("ID", "locations") .apply(); - Assert.assertEquals( - "move before should not affect identifier fields", - Sets.newHashSet(SCHEMA.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("move before should not affect identifier fields") + .containsExactly(SCHEMA.findField("id").fieldId()); newSchema = new SchemaUpdate(schemaWithIdentifierFields, SCHEMA_LAST_COLUMN_ID) @@ -2062,10 +2022,9 @@ public void testMoveIdentifierFieldsCaseInsensitive() { .moveFirst("ID") .apply(); - Assert.assertEquals( - "move first should not affect identifier fields", - Sets.newHashSet(SCHEMA.findField("id").fieldId()), - newSchema.identifierFieldIds()); + assertThat(newSchema.identifierFieldIds()) + .as("move first should not affect identifier fields") + .containsExactly(SCHEMA.findField("id").fieldId()); } @Test @@ -2088,8 +2047,7 @@ public void testMoveTopDeletedColumnAfterAnotherColumn() { .addRequiredColumn("id", Types.IntegerType.get()) .moveAfter("id", "data") .apply(); - Assert.assertEquals( - "Should move deleted column correctly", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -2112,8 +2070,7 @@ public void testMoveTopDeletedColumnBeforeAnotherColumn() { .addRequiredColumn("id", Types.IntegerType.get()) .moveBefore("id", "data_1") .apply(); - Assert.assertEquals( - "Should move deleted column correctly", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -2136,8 +2093,7 @@ public void testMoveTopDeletedColumnToFirst() { .addRequiredColumn("id", Types.IntegerType.get()) .moveFirst("id") .apply(); - Assert.assertEquals( - "Should move deleted column correctly", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -2171,8 +2127,7 @@ public void testMoveDeletedNestedStructFieldAfterAnotherColumn() { .moveAfter("struct.data", "struct.count") .apply(); - Assert.assertEquals( - "Should move deleted nested column correctly", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -2206,8 +2161,7 @@ public void testMoveDeletedNestedStructFieldBeforeAnotherColumn() { .moveBefore("struct.data", "struct.data_1") .apply(); - Assert.assertEquals( - "Should move deleted nested column correctly", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } @Test @@ -2241,7 +2195,6 @@ public void testMoveDeletedNestedStructFieldToFirst() { .moveFirst("struct.data") .apply(); - Assert.assertEquals( - "Should move deleted nested column correctly", expected.asStruct(), actual.asStruct()); + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } } diff --git a/core/src/test/java/org/apache/iceberg/TestTableUpdatePartitionSpec.java b/core/src/test/java/org/apache/iceberg/TestTableUpdatePartitionSpec.java index f3bfdf669e33..a4e587068e74 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableUpdatePartitionSpec.java +++ b/core/src/test/java/org/apache/iceberg/TestTableUpdatePartitionSpec.java @@ -21,38 +21,33 @@ import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.expressions.Expressions.truncate; import static org.apache.iceberg.expressions.Expressions.year; +import static org.assertj.core.api.Assertions.assertThat; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestTableUpdatePartitionSpec extends TableTestBase { - - @Parameterized.Parameters - public static Object[][] parameters() { - return new Object[][] { - new Object[] {1}, new Object[] {2}, - }; - } +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestTableUpdatePartitionSpec extends TestBase { - public TestTableUpdatePartitionSpec(int formatVersion) { - super(formatVersion); + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - @Before + @BeforeEach public void verifyInitialSpec() { PartitionSpec initialSpec = PartitionSpec.builderFor(table.schema()).bucket("data", 16).build(); - Assert.assertEquals("Should use the expected initial spec", initialSpec, table.spec()); - Assert.assertEquals(1000, table.spec().lastAssignedFieldId()); - Assert.assertEquals(0, table.spec().specId()); + assertThat(table.spec()).isEqualTo(initialSpec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1000); + assertThat(table.spec().specId()).isEqualTo(0); } - @Test + @TestTemplate public void testCommitUpdatedSpec() { table.updateSpec().addField(bucket("id", 8)).commit(); @@ -62,8 +57,10 @@ public void testCommitUpdatedSpec() { .bucket("data", 16) .bucket("id", 8, "id_bucket_8") .build(); - Assert.assertEquals("Should append a partition field to the spec", evolvedSpec, table.spec()); - Assert.assertEquals(1001, table.spec().lastAssignedFieldId()); + assertThat(table.spec()) + .as("Should append a partition field to the spec") + .isEqualTo(evolvedSpec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1001); table .updateSpec() @@ -90,10 +87,10 @@ public void testCommitUpdatedSpec() { .build(), table.spec()); - Assert.assertEquals(1002, table.spec().lastAssignedFieldId()); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1002); } - @Test + @TestTemplate public void testNoopCommit() { TableMetadata current = table.ops().current(); int currentVersion = TestTables.metadataVersion("test"); @@ -102,20 +99,20 @@ public void testNoopCommit() { table.updateSpec().commit(); TableMetadata updated = table.ops().current(); Integer updatedVersion = TestTables.metadataVersion("test"); - Assert.assertEquals(current, updated); + assertThat(updated).isEqualTo(current); currentVersion += 1; - Assert.assertEquals(currentVersion, updatedVersion.intValue()); + assertThat(updatedVersion).isEqualTo(currentVersion); // no-op commit due to no-op rename table.updateSpec().renameField("data_bucket", "data_bucket").commit(); updated = table.ops().current(); updatedVersion = TestTables.metadataVersion("test"); - Assert.assertEquals(current, updated); + assertThat(updated).isEqualTo(current); currentVersion += 1; - Assert.assertEquals(currentVersion, updatedVersion.intValue()); + assertThat(updatedVersion).isEqualTo(currentVersion); } - @Test + @TestTemplate public void testRenameField() { table .updateSpec() @@ -130,8 +127,8 @@ public void testRenameField() { .bucket("id", 8, "id_bucket_8") .build(); - Assert.assertEquals("should match evolved spec", evolvedSpec, table.spec()); - Assert.assertEquals(1001, table.spec().lastAssignedFieldId()); + assertThat(table.spec()).isEqualTo(evolvedSpec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1001); table .updateSpec() @@ -147,11 +144,11 @@ public void testRenameField() { .truncate("id", 4, "id_trunc_4") .build(); - Assert.assertEquals("should match evolved spec", evolvedSpec, table.spec()); - Assert.assertEquals(1002, table.spec().lastAssignedFieldId()); + assertThat(table.spec()).isEqualTo(evolvedSpec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1002); } - @Test + @TestTemplate public void testRenameOnlyEvolution() { table.updateSpec().renameField("data_bucket", "data_partition").commit(); @@ -161,11 +158,11 @@ public void testRenameOnlyEvolution() { .bucket("data", 16, "data_partition") .build(); - Assert.assertEquals("should match evolved spec", evolvedSpec, table.spec()); - Assert.assertEquals(1000, table.spec().lastAssignedFieldId()); + assertThat(table.spec()).isEqualTo(evolvedSpec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1000); } - @Test + @TestTemplate public void testRemoveAndAddField() { table.updateSpec().removeField("data_bucket").addField(bucket("id", 8)).commit(); @@ -186,10 +183,10 @@ public void testRemoveAndAddField() { .build(), table.spec()); - Assert.assertEquals(1001, table.spec().lastAssignedFieldId()); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1001); } - @Test + @TestTemplate public void testRemoveAndAddYearField() { table.updateSchema().addColumn("year_field", Types.DateType.get()).commit(); table.updateSpec().addField(year("year_field")).commit(); @@ -201,8 +198,8 @@ public void testRemoveAndAddYearField() { .year("year_field") .build(); - Assert.assertEquals("should match evolved spec", evolvedSpec, table.spec()); - Assert.assertEquals(1001, table.spec().lastAssignedFieldId()); + assertThat(table.spec()).isEqualTo(evolvedSpec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1001); table.updateSpec().removeField("year_field_year").addField(year("year_field")).commit(); @@ -224,10 +221,10 @@ public void testRemoveAndAddYearField() { .build(), table.spec()); - Assert.assertEquals(1001, table.spec().lastAssignedFieldId()); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1001); } - @Test + @TestTemplate public void testAddAndRemoveField() { table.updateSpec().addField(bucket("data", 6)).removeField("data_bucket").commit(); @@ -246,10 +243,10 @@ public void testAddAndRemoveField() { .add(2, 1001, "data_bucket_6", Transforms.bucket(6)) .build(), table.spec()); - Assert.assertEquals(1001, table.spec().lastAssignedFieldId()); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1001); } - @Test + @TestTemplate public void testAddAfterLastFieldRemoved() { table.updateSpec().removeField("data_bucket").commit(); @@ -268,7 +265,7 @@ public void testAddAfterLastFieldRemoved() { table.spec()); V2Assert.assertEquals( "Should match the last assigned field id", 999, table.spec().lastAssignedFieldId()); - Assert.assertEquals(1000, table.ops().current().lastAssignedPartitionId()); + assertThat(table.ops().current().lastAssignedPartitionId()).isEqualTo(1000); table.updateSpec().addField(bucket("id", 8)).commit(); @@ -287,7 +284,7 @@ public void testAddAfterLastFieldRemoved() { .add(1, 1001, "id_bucket_8", Transforms.bucket(8)) .build(), table.spec()); - Assert.assertEquals(1001, table.spec().lastAssignedFieldId()); - Assert.assertEquals(1001, table.ops().current().lastAssignedPartitionId()); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(1001); + assertThat(table.ops().current().lastAssignedPartitionId()).isEqualTo(1001); } } diff --git a/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java b/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java index 7cf993307e3d..08714dec01f1 100644 --- a/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java +++ b/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java @@ -20,27 +20,25 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.types.Types; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; -@RunWith(Parameterized.class) -public class TestTimestampPartitions extends TableTestBase { - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; +@ExtendWith(ParameterizedTestExtension.class) +public class TestTimestampPartitions extends TestBase { + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - public TestTimestampPartitions(int formatVersion) { - super(formatVersion); - } - - @Test + @TestTemplate public void testPartitionAppend() throws IOException { Schema dateSchema = new Schema( @@ -58,8 +56,8 @@ public void testPartitionAppend() throws IOException { .withPartitionPath("date=2018-06-08") .build(); - File tableDir = temp.newFolder(); - Assert.assertTrue(tableDir.delete()); + File tableDir = Files.createTempDirectory(temp, "junit").toFile(); + assertThat(tableDir.delete()).isTrue(); this.table = TestTables.create( @@ -67,7 +65,7 @@ public void testPartitionAppend() throws IOException { table.newAppend().appendFile(dataFile).commit(); long id = table.currentSnapshot().snapshotId(); - Assert.assertEquals(table.currentSnapshot().allManifests(table.io()).size(), 1); + assertThat(table.currentSnapshot().allManifests(table.io())).hasSize(1); validateManifestEntries( table.currentSnapshot().allManifests(table.io()).get(0), ids(id), diff --git a/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java b/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java index 5ecf138870ad..33b003cfd8c5 100644 --- a/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java +++ b/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java @@ -25,18 +25,18 @@ import static org.apache.iceberg.expressions.Expressions.ref; import static org.apache.iceberg.expressions.Expressions.truncate; import static org.apache.iceberg.expressions.Expressions.year; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import java.util.Arrays; +import java.util.List; import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) -public class TestUpdatePartitionSpec extends TableTestBase { +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) +public class TestUpdatePartitionSpec extends TestBase { private static final Schema SCHEMA = new Schema( Types.NestedField.required(1, "id", Types.LongType.get()), @@ -52,76 +52,72 @@ public class TestUpdatePartitionSpec extends TableTestBase { .bucket("id", 16, "shard") .build(); - @Parameterized.Parameters(name = "formatVersion = {0}") - public static Object[] parameters() { - return new Object[] {1, 2}; - } - - public TestUpdatePartitionSpec(int formatVersion) { - super(formatVersion); + @Parameters(name = "formatVersion = {0}") + protected static List parameters() { + return Arrays.asList(1, 2); } - @Test + @TestTemplate public void testAddIdentityByName() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED).addField("category").apply(); PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).identity("category").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddIdentityByTerm() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED).addField(ref("category")).apply(); PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).identity("category").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddYear() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED).addField(year("ts")).apply(); PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).year("ts").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddMonth() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED).addField(month("ts")).apply(); PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).month("ts").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddDay() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED).addField(day("ts")).apply(); PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).day("ts").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddHour() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED).addField(hour("ts")).apply(); PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).hour("ts").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddBucket() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED) @@ -132,10 +128,10 @@ public void testAddBucket() { PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).bucket("id", 16, "id_bucket_16").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddTruncate() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED) @@ -146,10 +142,10 @@ public void testAddTruncate() { PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).truncate("data", 4, "data_trunc_4").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddNamedPartition() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED) @@ -158,10 +154,10 @@ public void testAddNamedPartition() { PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).bucket("id", 16, "shard").build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddToExisting() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) @@ -176,10 +172,10 @@ public void testAddToExisting() { .truncate("data", 4, "data_trunc_4") .build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testMultipleAdds() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED) @@ -197,10 +193,10 @@ public void testMultipleAdds() { .truncate("data", 4, "prefix") .build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testAddHourToDay() { // multiple partitions for the same source with different time granularity is not allowed by the // builder, but is @@ -211,15 +207,13 @@ public void testAddHourToDay() { PartitionSpec byHour = new BaseUpdatePartitionSpec(formatVersion, byDay).addField(hour("ts")).apply(); - Assert.assertEquals( - "Should have a day and an hour time field", - ImmutableList.of( + assertThat(byHour.fields()) + .containsExactly( new PartitionField(2, 1000, "ts_day", Transforms.day()), - new PartitionField(2, 1001, "ts_hour", Transforms.hour())), - byHour.fields()); + new PartitionField(2, 1001, "ts_hour", Transforms.hour())); } - @Test + @TestTemplate public void testAddMultipleBuckets() { PartitionSpec bucket16 = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED) @@ -235,10 +229,10 @@ public void testAddMultipleBuckets() { .bucket("id", 8, "id_bucket_8") .build(); - Assert.assertEquals("Should have multiple bucket partition fields", expected, bucket8); + assertThat(bucket8).isEqualTo(expected); } - @Test + @TestTemplate public void testRemoveIdentityByName() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).removeField("category").apply(); @@ -261,7 +255,7 @@ public void testRemoveIdentityByName() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testRemoveBucketByName() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).removeField("shard").apply(); @@ -284,7 +278,7 @@ public void testRemoveBucketByName() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testRemoveIdentityByEquivalent() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) @@ -309,7 +303,7 @@ public void testRemoveIdentityByEquivalent() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testRemoveDayByEquivalent() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).removeField(day("ts")).apply(); @@ -332,7 +326,7 @@ public void testRemoveDayByEquivalent() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testRemoveBucketByEquivalent() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) @@ -354,7 +348,7 @@ public void testRemoveBucketByEquivalent() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testRename() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) @@ -364,10 +358,10 @@ public void testRename() { PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).identity("category").day("ts").bucket("id", 16).build(); - Assert.assertEquals("Should match expected spec", expected, updated); + assertThat(updated).isEqualTo(expected); } - @Test + @TestTemplate public void testMultipleChanges() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) @@ -396,7 +390,7 @@ public void testMultipleChanges() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testAddDeletedName() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) @@ -418,9 +412,9 @@ public void testAddDeletedName() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testRemoveNewlyAddedFieldByName() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .addField("prefix", truncate("data", 4)) @@ -429,9 +423,9 @@ public void testRemoveNewlyAddedFieldByName() { .hasMessageStartingWith("Cannot delete newly added field"); } - @Test + @TestTemplate public void testRemoveNewlyAddedFieldByTransform() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .addField("prefix", truncate("data", 4)) @@ -440,9 +434,9 @@ public void testRemoveNewlyAddedFieldByTransform() { .hasMessageStartingWith("Cannot delete newly added field"); } - @Test + @TestTemplate public void testAddAlreadyAddedFieldByTransform() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .addField("prefix", truncate("data", 4)) @@ -451,9 +445,9 @@ public void testAddAlreadyAddedFieldByTransform() { .hasMessageStartingWith("Cannot add duplicate partition field"); } - @Test + @TestTemplate public void testAddAlreadyAddedFieldByName() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .addField("prefix", truncate("data", 4)) @@ -462,9 +456,9 @@ public void testAddAlreadyAddedFieldByName() { .hasMessageStartingWith("Cannot add duplicate partition field"); } - @Test + @TestTemplate public void testAddRedundantTimePartition() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED) .addField(day("ts")) @@ -472,7 +466,7 @@ public void testAddRedundantTimePartition() { .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Cannot add redundant partition field"); - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .addField(hour("ts")) // does not conflict with day because day already exists @@ -481,78 +475,66 @@ public void testAddRedundantTimePartition() { .hasMessageStartingWith("Cannot add redundant partition"); } - @Test + @TestTemplate public void testNoEffectAddDeletedSameFieldWithSameName() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .removeField("shard") .addField("shard", bucket("id", 16)) .apply(); - Assert.assertEquals(PARTITIONED, updated); + assertThat(updated).isEqualTo(PARTITIONED); updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .removeField("shard") .addField(bucket("id", 16)) .apply(); - Assert.assertEquals(PARTITIONED, updated); + assertThat(updated).isEqualTo(PARTITIONED); } - @Test + @TestTemplate public void testGenerateNewSpecAddDeletedSameFieldWithDifferentName() { PartitionSpec updated = new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .removeField("shard") .addField("new_shard", bucket("id", 16)) .apply(); - Assert.assertEquals("Should match expected field size", 3, updated.fields().size()); - Assert.assertEquals( - "Should match expected field name", "category", updated.fields().get(0).name()); - Assert.assertEquals( - "Should match expected field name", "ts_day", updated.fields().get(1).name()); - Assert.assertEquals( - "Should match expected field name", "new_shard", updated.fields().get(2).name()); - Assert.assertEquals( - "Should match expected field transform", - "identity", - updated.fields().get(0).transform().toString()); - Assert.assertEquals( - "Should match expected field transform", - "day", - updated.fields().get(1).transform().toString()); - Assert.assertEquals( - "Should match expected field transform", - "bucket[16]", - updated.fields().get(2).transform().toString()); - } - - @Test + assertThat(updated.fields()).hasSize(3); + assertThat(updated.fields().get(0).name()).isEqualTo("category"); + assertThat(updated.fields().get(1).name()).isEqualTo("ts_day"); + assertThat(updated.fields().get(2).name()).isEqualTo("new_shard"); + assertThat(updated.fields().get(0).transform()).asString().isEqualTo("identity"); + assertThat(updated.fields().get(1).transform()).asString().isEqualTo("day"); + assertThat(updated.fields().get(2).transform()).asString().isEqualTo("bucket[16]"); + } + + @TestTemplate public void testAddDuplicateByName() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).addField("category")) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Cannot add duplicate partition field"); } - @Test + @TestTemplate public void testAddDuplicateByRef() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).addField(ref("category"))) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Cannot add duplicate partition field"); } - @Test + @TestTemplate public void testAddDuplicateTransform() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).addField(bucket("id", 16))) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Cannot add duplicate partition field"); } - @Test + @TestTemplate public void testAddNamedDuplicate() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .addField("b16", bucket("id", 16))) @@ -560,17 +542,17 @@ public void testAddNamedDuplicate() { .hasMessageStartingWith("Cannot add duplicate partition field"); } - @Test + @TestTemplate public void testRemoveUnknownFieldByName() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED).removeField("moon")) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Cannot find partition field to remove"); } - @Test + @TestTemplate public void testRemoveUnknownFieldByEquivalent() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .removeField(hour("ts")) // day(ts) exists @@ -579,9 +561,9 @@ public void testRemoveUnknownFieldByEquivalent() { .hasMessageStartingWith("Cannot find partition field to remove"); } - @Test + @TestTemplate public void testRenameUnknownField() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .renameField("shake", "seal")) @@ -589,9 +571,9 @@ public void testRenameUnknownField() { .hasMessage("Cannot find partition field to rename: shake"); } - @Test + @TestTemplate public void testRenameAfterAdd() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .addField("data_trunc", truncate("data", 4)) @@ -600,9 +582,9 @@ public void testRenameAfterAdd() { .hasMessage("Cannot rename newly added partition field: data_trunc"); } - @Test + @TestTemplate public void testRenameAndDelete() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .renameField("shard", "id_bucket") @@ -611,9 +593,9 @@ public void testRenameAndDelete() { .hasMessage("Cannot rename and delete partition field: shard"); } - @Test + @TestTemplate public void testDeleteAndRename() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> new BaseUpdatePartitionSpec(formatVersion, PARTITIONED) .removeField(bucket("id", 16)) @@ -622,7 +604,7 @@ public void testDeleteAndRename() { .hasMessage("Cannot delete and rename partition field: shard"); } - @Test + @TestTemplate public void testRemoveAndAddMultiTimes() { PartitionSpec addFirstTime = new BaseUpdatePartitionSpec(formatVersion, UNPARTITIONED) @@ -644,29 +626,15 @@ public void testRemoveAndAddMultiTimes() { .apply(); if (formatVersion == 1) { - Assert.assertEquals("Should match expected spec field size", 3, updated.fields().size()); - - Assert.assertTrue( - "Should match expected field name", - updated.fields().get(0).name().matches("^ts_date(?:_\\d+)+$")); - Assert.assertTrue( - "Should match expected field name", - updated.fields().get(1).name().matches("^ts_date_(?:\\d+)+$")); - Assert.assertEquals( - "Should match expected field name", "ts_date", updated.fields().get(2).name()); - - Assert.assertEquals( - "Should match expected field transform", - "void", - updated.fields().get(0).transform().toString()); - Assert.assertEquals( - "Should match expected field transform", - "void", - updated.fields().get(1).transform().toString()); - Assert.assertEquals( - "Should match expected field transform", - "month", - updated.fields().get(2).transform().toString()); + assertThat(updated.fields()).hasSize(3); + + assertThat(updated.fields().get(0).name()).matches("^ts_date(?:_\\d+)+$"); + assertThat(updated.fields().get(1).name()).matches("^ts_date(?:_\\d+)+$"); + assertThat(updated.fields().get(2).name()).isEqualTo("ts_date"); + + assertThat(updated.fields().get(0).transform()).asString().isEqualTo("void"); + assertThat(updated.fields().get(1).transform()).asString().isEqualTo("void"); + assertThat(updated.fields().get(2).transform()).asString().isEqualTo("month"); } PartitionSpec v2Expected = PartitionSpec.builderFor(SCHEMA).month("ts", "ts_date").build(); @@ -674,7 +642,7 @@ public void testRemoveAndAddMultiTimes() { V2Assert.assertEquals("Should match expected spec", v2Expected, updated); } - @Test + @TestTemplate public void testRemoveAndUpdateWithDifferentTransformation() { PartitionSpec expected = PartitionSpec.builderFor(SCHEMA).month("ts", "ts_transformed").build(); PartitionSpec updated = @@ -684,30 +652,16 @@ public void testRemoveAndUpdateWithDifferentTransformation() { .apply(); if (formatVersion == 1) { - Assert.assertEquals("Should match expected spec field size", 2, updated.fields().size()); - Assert.assertEquals( - "Should match expected field name", - "ts_transformed_1000", - updated.fields().get(0).name()); - Assert.assertEquals( - "Should match expected field name", "ts_transformed", updated.fields().get(1).name()); - - Assert.assertEquals( - "Should match expected field transform", - "void", - updated.fields().get(0).transform().toString()); - Assert.assertEquals( - "Should match expected field transform", - "day", - updated.fields().get(1).transform().toString()); + assertThat(updated.fields()).hasSize(2); + assertThat(updated.fields().get(0).name()).isEqualTo("ts_transformed_1000"); + assertThat(updated.fields().get(1).name()).isEqualTo("ts_transformed"); + + assertThat(updated.fields().get(0).transform()).asString().isEqualTo("void"); + assertThat(updated.fields().get(1).transform()).asString().isEqualTo("day"); } else { - Assert.assertEquals("Should match expected spec field size", 1, updated.fields().size()); - Assert.assertEquals( - "Should match expected field name", "ts_transformed", updated.fields().get(0).name()); - Assert.assertEquals( - "Should match expected field transform", - "day", - updated.fields().get(0).transform().toString()); + assertThat(updated.fields()).hasSize(1); + assertThat(updated.fields().get(0).name()).isEqualTo("ts_transformed"); + assertThat(updated.fields().get(0).transform()).asString().isEqualTo("day"); } } From e769addf75400e9dffb72e7e417bc752f804622a Mon Sep 17 00:00:00 2001 From: Bryan Keller Date: Thu, 21 Mar 2024 14:08:16 -0700 Subject: [PATCH 21/23] Kafka Connect: Record converters (#9641) --- .../connect/events/TableReference.java | 7 +- .../iceberg/connect/IcebergSinkConfig.java | 6 +- .../iceberg/connect/IcebergSinkConnector.java | 5 +- .../iceberg/connect/data/IcebergWriter.java | 32 +- .../iceberg/connect/data/NoOpWriter.java | 2 +- .../connect/data/PartitionedAppendWriter.java | 4 +- .../iceberg/connect/data/RecordConverter.java | 517 ++++++++++ .../iceberg/connect/data/SchemaUpdate.java | 44 +- .../iceberg/connect/data/SchemaUtils.java | 14 +- .../iceberg/connect/data/Utilities.java | 20 +- .../connect/IcebergSinkConnectorTest.java | 4 +- .../connect/data/RecordConverterTest.java | 936 ++++++++++++++++++ 12 files changed, 1526 insertions(+), 65 deletions(-) create mode 100644 kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/RecordConverter.java create mode 100644 kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/RecordConverterTest.java diff --git a/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java b/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java index d1400f58f74c..50eaa1050485 100644 --- a/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java +++ b/kafka-connect/kafka-connect-events/src/main/java/org/apache/iceberg/connect/events/TableReference.java @@ -18,10 +18,9 @@ */ package org.apache.iceberg.connect.events; -import static java.util.stream.Collectors.toList; - import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.avro.util.Utf8; @@ -96,7 +95,9 @@ public void put(int i, Object v) { return; case NAMESPACE: this.namespace = - v == null ? null : ((List) v).stream().map(Utf8::toString).collect(toList()); + v == null + ? null + : ((List) v).stream().map(Utf8::toString).collect(Collectors.toList()); return; case NAME: this.name = v == null ? null : v.toString(); diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConfig.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConfig.java index aa1ecdd5d1ba..d1572fbff37b 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConfig.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConfig.java @@ -18,8 +18,6 @@ */ package org.apache.iceberg.connect; -import static java.util.stream.Collectors.toList; - import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Paths; @@ -28,6 +26,7 @@ import java.util.Map; import java.util.Properties; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.apache.iceberg.IcebergBuild; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -72,7 +71,6 @@ public class IcebergSinkConfig extends AbstractConfig { private static final String TABLES_DEFAULT_COMMIT_BRANCH = "iceberg.tables.default-commit-branch"; private static final String TABLES_DEFAULT_ID_COLUMNS = "iceberg.tables.default-id-columns"; private static final String TABLES_DEFAULT_PARTITION_BY = "iceberg.tables.default-partition-by"; - // FIXME: add config for CDC and upsert mode private static final String TABLES_AUTO_CREATE_ENABLED_PROP = "iceberg.tables.auto-create-enabled"; private static final String TABLES_EVOLVE_SCHEMA_ENABLED_PROP = @@ -365,7 +363,7 @@ static List stringToList(String value, String regex) { return ImmutableList.of(); } - return Arrays.stream(value.split(regex)).map(String::trim).collect(toList()); + return Arrays.stream(value.split(regex)).map(String::trim).collect(Collectors.toList()); } public String controlTopic() { diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConnector.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConnector.java index 485b209302d5..8be8518f4407 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConnector.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/IcebergSinkConnector.java @@ -18,11 +18,10 @@ */ package org.apache.iceberg.connect; -import static java.util.stream.Collectors.toList; - import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.kafka.common.config.ConfigDef; @@ -60,7 +59,7 @@ public List> taskConfigs(int maxTasks) { map.put(IcebergSinkConfig.INTERNAL_TRANSACTIONAL_SUFFIX_PROP, txnSuffix + i); return map; }) - .collect(toList()); + .collect(Collectors.toList()); } @Override diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriter.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriter.java index da88b3b50ffe..27ffc4de9973 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriter.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/IcebergWriter.java @@ -38,8 +38,7 @@ public class IcebergWriter implements RecordWriter { private final IcebergSinkConfig config; private final List writerResults; - // FIXME: update this when the record converter is added - // private RecordConverter recordConverter; + private RecordConverter recordConverter; private TaskWriter writer; public IcebergWriter(Table table, String tableName, IcebergSinkConfig config) { @@ -52,19 +51,15 @@ public IcebergWriter(Table table, String tableName, IcebergSinkConfig config) { private void initNewWriter() { this.writer = Utilities.createTableWriter(table, tableName, config); - // FIXME: update this when the record converter is added - // this.recordConverter = new RecordConverter(table, config); + this.recordConverter = new RecordConverter(table, config); } @Override public void write(SinkRecord record) { try { - // TODO: config to handle tombstones instead of always ignoring? + // ignore tombstones... if (record.value() != null) { Record row = convertToRow(record); - - // FIXME: add CDC operation support - writer.write(row); } } catch (Exception e) { @@ -77,8 +72,25 @@ public void write(SinkRecord record) { } private Record convertToRow(SinkRecord record) { - // FIXME: update this when the record converter is added - return null; + if (!config.evolveSchemaEnabled()) { + return recordConverter.convert(record.value()); + } + + SchemaUpdate.Consumer updates = new SchemaUpdate.Consumer(); + Record row = recordConverter.convert(record.value(), updates); + + if (!updates.empty()) { + // complete the current file + flush(); + // apply the schema updates, this will refresh the table + SchemaUtils.applySchemaUpdates(table, updates); + // initialize a new writer with the new schema + initNewWriter(); + // convert the row again, this time using the new table schema + row = recordConverter.convert(record.value(), null); + } + + return row; } private void flush() { diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/NoOpWriter.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/NoOpWriter.java index 31abe09cf1a4..64ca44f03209 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/NoOpWriter.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/NoOpWriter.java @@ -21,7 +21,7 @@ import java.util.List; import org.apache.kafka.connect.sink.SinkRecord; -public class NoOpWriter implements RecordWriter { +class NoOpWriter implements RecordWriter { @Override public void write(SinkRecord record) { // NO-OP diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/PartitionedAppendWriter.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/PartitionedAppendWriter.java index 1d429e44e675..ad8b5715a99b 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/PartitionedAppendWriter.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/PartitionedAppendWriter.java @@ -29,12 +29,12 @@ import org.apache.iceberg.io.OutputFileFactory; import org.apache.iceberg.io.PartitionedFanoutWriter; -public class PartitionedAppendWriter extends PartitionedFanoutWriter { +class PartitionedAppendWriter extends PartitionedFanoutWriter { private final PartitionKey partitionKey; private final InternalRecordWrapper wrapper; - public PartitionedAppendWriter( + PartitionedAppendWriter( PartitionSpec spec, FileFormat format, FileAppenderFactory appenderFactory, diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/RecordConverter.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/RecordConverter.java new file mode 100644 index 000000000000..406a2cba4526 --- /dev/null +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/RecordConverter.java @@ -0,0 +1,517 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.connect.data; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import java.time.temporal.Temporal; +import java.util.Base64; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.stream.Collectors; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.connect.IcebergSinkConfig; +import org.apache.iceberg.data.GenericRecord; +import org.apache.iceberg.data.Record; +import org.apache.iceberg.mapping.MappedField; +import org.apache.iceberg.mapping.NameMapping; +import org.apache.iceberg.mapping.NameMappingParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Type.PrimitiveType; +import org.apache.iceberg.types.Types.DecimalType; +import org.apache.iceberg.types.Types.ListType; +import org.apache.iceberg.types.Types.MapType; +import org.apache.iceberg.types.Types.NestedField; +import org.apache.iceberg.types.Types.StructType; +import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.util.DateTimeUtil; +import org.apache.kafka.connect.data.Struct; + +class RecordConverter { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private static final DateTimeFormatter OFFSET_TIMESTAMP_FORMAT = + new DateTimeFormatterBuilder() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendOffset("+HHmm", "Z") + .toFormatter(); + + private final Schema tableSchema; + private final NameMapping nameMapping; + private final IcebergSinkConfig config; + private final Map> structNameMap = Maps.newHashMap(); + + RecordConverter(Table table, IcebergSinkConfig config) { + this.tableSchema = table.schema(); + this.nameMapping = createNameMapping(table); + this.config = config; + } + + Record convert(Object data) { + return convert(data, null); + } + + Record convert(Object data, SchemaUpdate.Consumer schemaUpdateConsumer) { + if (data instanceof Struct || data instanceof Map) { + return convertStructValue(data, tableSchema.asStruct(), -1, schemaUpdateConsumer); + } + throw new UnsupportedOperationException("Cannot convert type: " + data.getClass().getName()); + } + + private NameMapping createNameMapping(Table table) { + String nameMappingString = table.properties().get(TableProperties.DEFAULT_NAME_MAPPING); + return nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null; + } + + private Object convertValue( + Object value, Type type, int fieldId, SchemaUpdate.Consumer schemaUpdateConsumer) { + if (value == null) { + return null; + } + switch (type.typeId()) { + case STRUCT: + return convertStructValue(value, type.asStructType(), fieldId, schemaUpdateConsumer); + case LIST: + return convertListValue(value, type.asListType(), schemaUpdateConsumer); + case MAP: + return convertMapValue(value, type.asMapType(), schemaUpdateConsumer); + case INTEGER: + return convertInt(value); + case LONG: + return convertLong(value); + case FLOAT: + return convertFloat(value); + case DOUBLE: + return convertDouble(value); + case DECIMAL: + return convertDecimal(value, (DecimalType) type); + case BOOLEAN: + return convertBoolean(value); + case STRING: + return convertString(value); + case UUID: + return convertUUID(value); + case BINARY: + case FIXED: + return convertBase64Binary(value); + case DATE: + return convertDateValue(value); + case TIME: + return convertTimeValue(value); + case TIMESTAMP: + return convertTimestampValue(value, (TimestampType) type); + } + throw new UnsupportedOperationException("Unsupported type: " + type.typeId()); + } + + protected GenericRecord convertStructValue( + Object value, + StructType schema, + int parentFieldId, + SchemaUpdate.Consumer schemaUpdateConsumer) { + if (value instanceof Map) { + return convertToStruct((Map) value, schema, parentFieldId, schemaUpdateConsumer); + } else if (value instanceof Struct) { + return convertToStruct((Struct) value, schema, parentFieldId, schemaUpdateConsumer); + } + throw new IllegalArgumentException("Cannot convert to struct: " + value.getClass().getName()); + } + + /** + * This method will be called for records when there is no record schema. Also, when there is no + * schema, we infer that map values are struct types. This method might also be called if the + * field value is a map but the Iceberg type is a struct. This can happen if the Iceberg table + * schema is not managed by the sink, i.e. created manually. + */ + private GenericRecord convertToStruct( + Map map, + StructType schema, + int structFieldId, + SchemaUpdate.Consumer schemaUpdateConsumer) { + GenericRecord result = GenericRecord.create(schema); + map.forEach( + (recordFieldNameObj, recordFieldValue) -> { + String recordFieldName = recordFieldNameObj.toString(); + NestedField tableField = lookupStructField(recordFieldName, schema, structFieldId); + if (tableField == null) { + // add the column if schema evolution is on, otherwise skip the value, + // skip the add column if we can't infer the type + if (schemaUpdateConsumer != null) { + Type type = SchemaUtils.inferIcebergType(recordFieldValue, config); + if (type != null) { + String parentFieldName = + structFieldId < 0 ? null : tableSchema.findColumnName(structFieldId); + schemaUpdateConsumer.addColumn(parentFieldName, recordFieldName, type); + } + } + } else { + result.setField( + tableField.name(), + convertValue( + recordFieldValue, + tableField.type(), + tableField.fieldId(), + schemaUpdateConsumer)); + } + }); + return result; + } + + /** This method will be called for records and struct values when there is a record schema. */ + private GenericRecord convertToStruct( + Struct struct, + StructType schema, + int structFieldId, + SchemaUpdate.Consumer schemaUpdateConsumer) { + GenericRecord result = GenericRecord.create(schema); + struct + .schema() + .fields() + .forEach( + recordField -> { + NestedField tableField = lookupStructField(recordField.name(), schema, structFieldId); + if (tableField == null) { + // add the column if schema evolution is on, otherwise skip the value + if (schemaUpdateConsumer != null) { + String parentFieldName = + structFieldId < 0 ? null : tableSchema.findColumnName(structFieldId); + Type type = SchemaUtils.toIcebergType(recordField.schema(), config); + schemaUpdateConsumer.addColumn(parentFieldName, recordField.name(), type); + } + } else { + boolean hasSchemaUpdates = false; + if (schemaUpdateConsumer != null) { + // update the type if needed and schema evolution is on + PrimitiveType evolveDataType = + SchemaUtils.needsDataTypeUpdate(tableField.type(), recordField.schema()); + if (evolveDataType != null) { + String fieldName = tableSchema.findColumnName(tableField.fieldId()); + schemaUpdateConsumer.updateType(fieldName, evolveDataType); + hasSchemaUpdates = true; + } + // make optional if needed and schema evolution is on + if (tableField.isRequired() && recordField.schema().isOptional()) { + String fieldName = tableSchema.findColumnName(tableField.fieldId()); + schemaUpdateConsumer.makeOptional(fieldName); + hasSchemaUpdates = true; + } + } + if (!hasSchemaUpdates) { + result.setField( + tableField.name(), + convertValue( + struct.get(recordField), + tableField.type(), + tableField.fieldId(), + schemaUpdateConsumer)); + } + } + }); + return result; + } + + private NestedField lookupStructField(String fieldName, StructType schema, int structFieldId) { + if (nameMapping == null) { + return config.schemaCaseInsensitive() + ? schema.caseInsensitiveField(fieldName) + : schema.field(fieldName); + } + + return structNameMap + .computeIfAbsent(structFieldId, notUsed -> createStructNameMap(schema)) + .get(fieldName); + } + + private Map createStructNameMap(StructType schema) { + Map map = Maps.newHashMap(); + schema + .fields() + .forEach( + col -> { + MappedField mappedField = nameMapping.find(col.fieldId()); + if (mappedField != null && !mappedField.names().isEmpty()) { + mappedField.names().forEach(name -> map.put(name, col)); + } else { + map.put(col.name(), col); + } + }); + return map; + } + + protected List convertListValue( + Object value, ListType type, SchemaUpdate.Consumer schemaUpdateConsumer) { + Preconditions.checkArgument(value instanceof List); + List list = (List) value; + return list.stream() + .map( + element -> { + int fieldId = type.fields().get(0).fieldId(); + return convertValue(element, type.elementType(), fieldId, schemaUpdateConsumer); + }) + .collect(Collectors.toList()); + } + + protected Map convertMapValue( + Object value, MapType type, SchemaUpdate.Consumer schemaUpdateConsumer) { + Preconditions.checkArgument(value instanceof Map); + Map map = (Map) value; + Map result = Maps.newHashMap(); + map.forEach( + (k, v) -> { + int keyFieldId = type.fields().get(0).fieldId(); + int valueFieldId = type.fields().get(1).fieldId(); + result.put( + convertValue(k, type.keyType(), keyFieldId, schemaUpdateConsumer), + convertValue(v, type.valueType(), valueFieldId, schemaUpdateConsumer)); + }); + return result; + } + + protected int convertInt(Object value) { + if (value instanceof Number) { + return ((Number) value).intValue(); + } else if (value instanceof String) { + return Integer.parseInt((String) value); + } + throw new IllegalArgumentException("Cannot convert to int: " + value.getClass().getName()); + } + + protected long convertLong(Object value) { + if (value instanceof Number) { + return ((Number) value).longValue(); + } else if (value instanceof String) { + return Long.parseLong((String) value); + } + throw new IllegalArgumentException("Cannot convert to long: " + value.getClass().getName()); + } + + protected float convertFloat(Object value) { + if (value instanceof Number) { + return ((Number) value).floatValue(); + } else if (value instanceof String) { + return Float.parseFloat((String) value); + } + throw new IllegalArgumentException("Cannot convert to float: " + value.getClass().getName()); + } + + protected double convertDouble(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } else if (value instanceof String) { + return Double.parseDouble((String) value); + } + throw new IllegalArgumentException("Cannot convert to double: " + value.getClass().getName()); + } + + protected BigDecimal convertDecimal(Object value, DecimalType type) { + BigDecimal bigDecimal; + if (value instanceof BigDecimal) { + bigDecimal = (BigDecimal) value; + } else if (value instanceof Number) { + Number num = (Number) value; + Double dbl = num.doubleValue(); + if (dbl.equals(Math.floor(dbl))) { + bigDecimal = BigDecimal.valueOf(num.longValue()); + } else { + bigDecimal = BigDecimal.valueOf(dbl); + } + } else if (value instanceof String) { + bigDecimal = new BigDecimal((String) value); + } else { + throw new IllegalArgumentException( + "Cannot convert to BigDecimal: " + value.getClass().getName()); + } + return bigDecimal.setScale(type.scale(), RoundingMode.HALF_UP); + } + + protected boolean convertBoolean(Object value) { + if (value instanceof Boolean) { + return (boolean) value; + } else if (value instanceof String) { + return Boolean.parseBoolean((String) value); + } + throw new IllegalArgumentException("Cannot convert to boolean: " + value.getClass().getName()); + } + + protected String convertString(Object value) { + try { + if (value instanceof String) { + return (String) value; + } else if (value instanceof Number || value instanceof Boolean) { + return value.toString(); + } else if (value instanceof Map || value instanceof List) { + return MAPPER.writeValueAsString(value); + } else if (value instanceof Struct) { + Struct struct = (Struct) value; + byte[] data = config.jsonConverter().fromConnectData(null, struct.schema(), struct); + return new String(data, StandardCharsets.UTF_8); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + throw new IllegalArgumentException("Cannot convert to string: " + value.getClass().getName()); + } + + protected UUID convertUUID(Object value) { + if (value instanceof String) { + return UUID.fromString((String) value); + } else if (value instanceof UUID) { + return (UUID) value; + } + throw new IllegalArgumentException("Cannot convert to UUID: " + value.getClass().getName()); + } + + protected ByteBuffer convertBase64Binary(Object value) { + if (value instanceof String) { + return ByteBuffer.wrap(Base64.getDecoder().decode((String) value)); + } else if (value instanceof byte[]) { + return ByteBuffer.wrap((byte[]) value); + } else if (value instanceof ByteBuffer) { + return (ByteBuffer) value; + } + throw new IllegalArgumentException("Cannot convert to binary: " + value.getClass().getName()); + } + + @SuppressWarnings("JavaUtilDate") + protected LocalDate convertDateValue(Object value) { + if (value instanceof Number) { + int days = ((Number) value).intValue(); + return DateTimeUtil.dateFromDays(days); + } else if (value instanceof String) { + return LocalDate.parse((String) value); + } else if (value instanceof LocalDate) { + return (LocalDate) value; + } else if (value instanceof Date) { + int days = (int) (((Date) value).getTime() / 1000 / 60 / 60 / 24); + return DateTimeUtil.dateFromDays(days); + } + throw new RuntimeException("Cannot convert date: " + value); + } + + @SuppressWarnings("JavaUtilDate") + protected LocalTime convertTimeValue(Object value) { + if (value instanceof Number) { + long millis = ((Number) value).longValue(); + return DateTimeUtil.timeFromMicros(millis * 1000); + } else if (value instanceof String) { + return LocalTime.parse((String) value); + } else if (value instanceof LocalTime) { + return (LocalTime) value; + } else if (value instanceof Date) { + long millis = ((Date) value).getTime(); + return DateTimeUtil.timeFromMicros(millis * 1000); + } + throw new RuntimeException("Cannot convert time: " + value); + } + + protected Temporal convertTimestampValue(Object value, TimestampType type) { + if (type.shouldAdjustToUTC()) { + return convertOffsetDateTime(value); + } + return convertLocalDateTime(value); + } + + @SuppressWarnings("JavaUtilDate") + private OffsetDateTime convertOffsetDateTime(Object value) { + if (value instanceof Number) { + long millis = ((Number) value).longValue(); + return DateTimeUtil.timestamptzFromMicros(millis * 1000); + } else if (value instanceof String) { + return parseOffsetDateTime((String) value); + } else if (value instanceof OffsetDateTime) { + return (OffsetDateTime) value; + } else if (value instanceof LocalDateTime) { + return ((LocalDateTime) value).atOffset(ZoneOffset.UTC); + } else if (value instanceof Date) { + return DateTimeUtil.timestamptzFromMicros(((Date) value).getTime() * 1000); + } + throw new RuntimeException( + "Cannot convert timestamptz: " + value + ", type: " + value.getClass()); + } + + private OffsetDateTime parseOffsetDateTime(String str) { + String tsStr = ensureTimestampFormat(str); + try { + return OFFSET_TIMESTAMP_FORMAT.parse(tsStr, OffsetDateTime::from); + } catch (DateTimeParseException e) { + return LocalDateTime.parse(tsStr, DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .atOffset(ZoneOffset.UTC); + } + } + + @SuppressWarnings("JavaUtilDate") + private LocalDateTime convertLocalDateTime(Object value) { + if (value instanceof Number) { + long millis = ((Number) value).longValue(); + return DateTimeUtil.timestampFromMicros(millis * 1000); + } else if (value instanceof String) { + return parseLocalDateTime((String) value); + } else if (value instanceof LocalDateTime) { + return (LocalDateTime) value; + } else if (value instanceof OffsetDateTime) { + return ((OffsetDateTime) value).toLocalDateTime(); + } else if (value instanceof Date) { + return DateTimeUtil.timestampFromMicros(((Date) value).getTime() * 1000); + } + throw new RuntimeException( + "Cannot convert timestamp: " + value + ", type: " + value.getClass()); + } + + private LocalDateTime parseLocalDateTime(String str) { + String tsStr = ensureTimestampFormat(str); + try { + return LocalDateTime.parse(tsStr, DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } catch (DateTimeParseException e) { + return OFFSET_TIMESTAMP_FORMAT.parse(tsStr, OffsetDateTime::from).toLocalDateTime(); + } + } + + private String ensureTimestampFormat(String str) { + String result = str; + if (result.charAt(10) == ' ') { + result = result.substring(0, 10) + 'T' + result.substring(11); + } + if (result.length() > 22 + && (result.charAt(19) == '+' || result.charAt(19) == '-') + && result.charAt(22) == ':') { + result = result.substring(0, 19) + result.substring(19).replace(":", ""); + } + return result; + } +} diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUpdate.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUpdate.java index 2bb0e65f204b..809bea84dcc2 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUpdate.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUpdate.java @@ -24,97 +24,97 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Type.PrimitiveType; -public class SchemaUpdate { +class SchemaUpdate { - public static class Consumer { + static class Consumer { private final Map addColumns = Maps.newHashMap(); private final Map updateTypes = Maps.newHashMap(); private final Map makeOptionals = Maps.newHashMap(); - public Collection addColumns() { + Collection addColumns() { return addColumns.values(); } - public Collection updateTypes() { + Collection updateTypes() { return updateTypes.values(); } - public Collection makeOptionals() { + Collection makeOptionals() { return makeOptionals.values(); } - public boolean empty() { + boolean empty() { return addColumns.isEmpty() && updateTypes.isEmpty() && makeOptionals.isEmpty(); } - public void addColumn(String parentName, String name, Type type) { + void addColumn(String parentName, String name, Type type) { AddColumn addCol = new AddColumn(parentName, name, type); addColumns.put(addCol.key(), addCol); } - public void updateType(String name, PrimitiveType type) { + void updateType(String name, PrimitiveType type) { updateTypes.put(name, new UpdateType(name, type)); } - public void makeOptional(String name) { + void makeOptional(String name) { makeOptionals.put(name, new MakeOptional(name)); } } - public static class AddColumn extends SchemaUpdate { + static class AddColumn extends SchemaUpdate { private final String parentName; private final String name; private final Type type; - public AddColumn(String parentName, String name, Type type) { + AddColumn(String parentName, String name, Type type) { this.parentName = parentName; this.name = name; this.type = type; } - public String parentName() { + String parentName() { return parentName; } - public String name() { + String name() { return name; } - public String key() { + String key() { return parentName == null ? name : parentName + "." + name; } - public Type type() { + Type type() { return type; } } - public static class UpdateType extends SchemaUpdate { + static class UpdateType extends SchemaUpdate { private final String name; private final PrimitiveType type; - public UpdateType(String name, PrimitiveType type) { + UpdateType(String name, PrimitiveType type) { this.name = name; this.type = type; } - public String name() { + String name() { return name; } - public PrimitiveType type() { + PrimitiveType type() { return type; } } - public static class MakeOptional extends SchemaUpdate { + static class MakeOptional extends SchemaUpdate { private final String name; - public MakeOptional(String name) { + MakeOptional(String name) { this.name = name; } - public String name() { + String name() { return name; } } diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUtils.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUtils.java index 64fa89041c29..a2e0729fd506 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUtils.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/SchemaUtils.java @@ -65,13 +65,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class SchemaUtils { +class SchemaUtils { private static final Logger LOG = LoggerFactory.getLogger(SchemaUtils.class); private static final Pattern TRANSFORM_REGEX = Pattern.compile("(\\w+)\\((.+)\\)"); - public static PrimitiveType needsDataTypeUpdate(Type currentIcebergType, Schema valueSchema) { + static PrimitiveType needsDataTypeUpdate(Type currentIcebergType, Schema valueSchema) { if (currentIcebergType.typeId() == TypeID.FLOAT && valueSchema.type() == Schema.Type.FLOAT64) { return DoubleType.get(); } @@ -81,7 +81,7 @@ public static PrimitiveType needsDataTypeUpdate(Type currentIcebergType, Schema return null; } - public static void applySchemaUpdates(Table table, SchemaUpdate.Consumer updates) { + static void applySchemaUpdates(Table table, SchemaUpdate.Consumer updates) { if (updates == null || updates.empty()) { // no updates to apply return; @@ -150,7 +150,7 @@ private static boolean isOptional(org.apache.iceberg.Schema schema, MakeOptional return field.isOptional(); } - public static PartitionSpec createPartitionSpec( + static PartitionSpec createPartitionSpec( org.apache.iceberg.Schema schema, List partitionBy) { if (partitionBy.isEmpty()) { return PartitionSpec.unpartitioned(); @@ -209,11 +209,11 @@ private static Pair transformArgPair(String argsStr) { return Pair.of(parts.get(0).trim(), Integer.parseInt(parts.get(1).trim())); } - public static Type toIcebergType(Schema valueSchema, IcebergSinkConfig config) { + static Type toIcebergType(Schema valueSchema, IcebergSinkConfig config) { return new SchemaGenerator(config).toIcebergType(valueSchema); } - public static Type inferIcebergType(Object value, IcebergSinkConfig config) { + static Type inferIcebergType(Object value, IcebergSinkConfig config) { return new SchemaGenerator(config).inferIcebergType(value); } @@ -290,7 +290,7 @@ Type toIcebergType(Schema valueSchema) { } @SuppressWarnings("checkstyle:CyclomaticComplexity") - public Type inferIcebergType(Object value) { + Type inferIcebergType(Object value) { if (value == null) { return null; } else if (value instanceof String) { diff --git a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/Utilities.java b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/Utilities.java index ec13b003a21a..4ff83f777527 100644 --- a/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/Utilities.java +++ b/kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/data/Utilities.java @@ -18,12 +18,6 @@ */ package org.apache.iceberg.connect.data; -import static java.util.stream.Collectors.toSet; -import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT; -import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT_DEFAULT; -import static org.apache.iceberg.TableProperties.WRITE_TARGET_FILE_SIZE_BYTES; -import static org.apache.iceberg.TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT; - import java.io.IOException; import java.net.URL; import java.nio.file.Files; @@ -33,9 +27,11 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.stream.Collectors; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.common.DynClasses; import org.apache.iceberg.common.DynConstructors; @@ -175,12 +171,16 @@ public static TaskWriter createTableWriter( Map tableProps = Maps.newHashMap(table.properties()); tableProps.putAll(config.writeProps()); - String formatStr = tableProps.getOrDefault(DEFAULT_FILE_FORMAT, DEFAULT_FILE_FORMAT_DEFAULT); + String formatStr = + tableProps.getOrDefault( + TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT); FileFormat format = FileFormat.fromString(formatStr); long targetFileSize = PropertyUtil.propertyAsLong( - tableProps, WRITE_TARGET_FILE_SIZE_BYTES, WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT); + tableProps, + TableProperties.WRITE_TARGET_FILE_SIZE_BYTES, + TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT); Set identifierFieldIds = table.schema().identifierFieldIds(); @@ -197,7 +197,7 @@ public static TaskWriter createTableWriter( } return field.fieldId(); }) - .collect(toSet()); + .collect(Collectors.toSet()); } FileAppenderFactory appenderFactory; @@ -224,8 +224,6 @@ public static TaskWriter createTableWriter( .format(format) .build(); - // FIXME: add delta writers - TaskWriter writer; if (table.spec().isUnpartitioned()) { writer = diff --git a/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/IcebergSinkConnectorTest.java b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/IcebergSinkConnectorTest.java index 86502794b224..c8f563a13911 100644 --- a/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/IcebergSinkConnectorTest.java +++ b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/IcebergSinkConnectorTest.java @@ -18,7 +18,6 @@ */ package org.apache.iceberg.connect; -import static org.apache.iceberg.connect.IcebergSinkConfig.INTERNAL_TRANSACTIONAL_SUFFIX_PROP; import static org.assertj.core.api.Assertions.assertThat; import java.util.List; @@ -35,6 +34,7 @@ public void testTaskConfigs() { connector.start(ImmutableMap.of()); List> configs = connector.taskConfigs(3); assertThat(configs).hasSize(3); - configs.forEach(map -> assertThat(map).containsKey(INTERNAL_TRANSACTIONAL_SUFFIX_PROP)); + configs.forEach( + map -> assertThat(map).containsKey(IcebergSinkConfig.INTERNAL_TRANSACTIONAL_SUFFIX_PROP)); } } diff --git a/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/RecordConverterTest.java b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/RecordConverterTest.java new file mode 100644 index 000000000000..b494a9da85d3 --- /dev/null +++ b/kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/data/RecordConverterTest.java @@ -0,0 +1,936 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.connect.data; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.temporal.Temporal; +import java.util.Base64; +import java.util.Collection; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.Function; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.connect.IcebergSinkConfig; +import org.apache.iceberg.connect.data.SchemaUpdate.AddColumn; +import org.apache.iceberg.connect.data.SchemaUpdate.UpdateType; +import org.apache.iceberg.data.GenericRecord; +import org.apache.iceberg.data.Record; +import org.apache.iceberg.mapping.MappedField; +import org.apache.iceberg.mapping.NameMapping; +import org.apache.iceberg.mapping.NameMappingParser; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types.BinaryType; +import org.apache.iceberg.types.Types.BooleanType; +import org.apache.iceberg.types.Types.DateType; +import org.apache.iceberg.types.Types.DecimalType; +import org.apache.iceberg.types.Types.DoubleType; +import org.apache.iceberg.types.Types.FixedType; +import org.apache.iceberg.types.Types.FloatType; +import org.apache.iceberg.types.Types.IntegerType; +import org.apache.iceberg.types.Types.ListType; +import org.apache.iceberg.types.Types.LongType; +import org.apache.iceberg.types.Types.MapType; +import org.apache.iceberg.types.Types.NestedField; +import org.apache.iceberg.types.Types.StringType; +import org.apache.iceberg.types.Types.StructType; +import org.apache.iceberg.types.Types.TimeType; +import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.types.Types.UUIDType; +import org.apache.kafka.connect.data.Decimal; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.data.Time; +import org.apache.kafka.connect.data.Timestamp; +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.json.JsonConverterConfig; +import org.apache.kafka.connect.storage.ConverterConfig; +import org.apache.kafka.connect.storage.ConverterType; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class RecordConverterTest { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private static final org.apache.iceberg.Schema SCHEMA = + new org.apache.iceberg.Schema( + NestedField.required(20, "i", IntegerType.get()), + NestedField.required(21, "l", LongType.get()), + NestedField.required(22, "d", DateType.get()), + NestedField.required(23, "t", TimeType.get()), + NestedField.required(24, "ts", TimestampType.withoutZone()), + NestedField.required(25, "tsz", TimestampType.withZone()), + NestedField.required(26, "fl", FloatType.get()), + NestedField.required(27, "do", DoubleType.get()), + NestedField.required(28, "dec", DecimalType.of(9, 2)), + NestedField.required(29, "s", StringType.get()), + NestedField.required(30, "b", BooleanType.get()), + NestedField.required(31, "u", UUIDType.get()), + NestedField.required(32, "f", FixedType.ofLength(3)), + NestedField.required(33, "bi", BinaryType.get()), + NestedField.required(34, "li", ListType.ofRequired(35, StringType.get())), + NestedField.required( + 36, "ma", MapType.ofRequired(37, 38, StringType.get(), StringType.get())), + NestedField.optional(39, "extra", StringType.get())); + + // we have 1 unmapped column so exclude that from the count + private static final int MAPPED_CNT = SCHEMA.columns().size() - 1; + + private static final org.apache.iceberg.Schema NESTED_SCHEMA = + new org.apache.iceberg.Schema( + NestedField.required(1, "ii", IntegerType.get()), + NestedField.required(2, "st", SCHEMA.asStruct())); + + private static final org.apache.iceberg.Schema SIMPLE_SCHEMA = + new org.apache.iceberg.Schema( + NestedField.required(1, "ii", IntegerType.get()), + NestedField.required(2, "st", StringType.get())); + + private static final org.apache.iceberg.Schema ID_SCHEMA = + new org.apache.iceberg.Schema(NestedField.required(1, "ii", IntegerType.get())); + + private static final org.apache.iceberg.Schema STRUCT_IN_LIST_SCHEMA = + new org.apache.iceberg.Schema( + NestedField.required(100, "stli", ListType.ofRequired(101, NESTED_SCHEMA.asStruct()))); + + private static final org.apache.iceberg.Schema STRUCT_IN_LIST_BASIC_SCHEMA = + new org.apache.iceberg.Schema( + NestedField.required(100, "stli", ListType.ofRequired(101, ID_SCHEMA.asStruct()))); + + private static final org.apache.iceberg.Schema STRUCT_IN_MAP_SCHEMA = + new org.apache.iceberg.Schema( + NestedField.required( + 100, + "stma", + MapType.ofRequired(101, 102, StringType.get(), NESTED_SCHEMA.asStruct()))); + + private static final org.apache.iceberg.Schema STRUCT_IN_MAP_BASIC_SCHEMA = + new org.apache.iceberg.Schema( + NestedField.required( + 100, "stma", MapType.ofRequired(101, 102, StringType.get(), ID_SCHEMA.asStruct()))); + + private static final Schema CONNECT_SCHEMA = + SchemaBuilder.struct() + .field("i", Schema.INT32_SCHEMA) + .field("l", Schema.INT64_SCHEMA) + .field("d", org.apache.kafka.connect.data.Date.SCHEMA) + .field("t", Time.SCHEMA) + .field("ts", Timestamp.SCHEMA) + .field("tsz", Timestamp.SCHEMA) + .field("fl", Schema.FLOAT32_SCHEMA) + .field("do", Schema.FLOAT64_SCHEMA) + .field("dec", Decimal.schema(2)) + .field("s", Schema.STRING_SCHEMA) + .field("b", Schema.BOOLEAN_SCHEMA) + .field("u", Schema.STRING_SCHEMA) + .field("f", Schema.BYTES_SCHEMA) + .field("bi", Schema.BYTES_SCHEMA) + .field("li", SchemaBuilder.array(Schema.STRING_SCHEMA)) + .field("ma", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.STRING_SCHEMA)); + + private static final Schema CONNECT_NESTED_SCHEMA = + SchemaBuilder.struct().field("ii", Schema.INT32_SCHEMA).field("st", CONNECT_SCHEMA); + + private static final Schema CONNECT_STRUCT_IN_LIST_SCHEMA = + SchemaBuilder.struct().field("stli", SchemaBuilder.array(CONNECT_NESTED_SCHEMA)).build(); + + private static final Schema CONNECT_STRUCT_IN_MAP_SCHEMA = + SchemaBuilder.struct() + .field("stma", SchemaBuilder.map(Schema.STRING_SCHEMA, CONNECT_NESTED_SCHEMA)) + .build(); + + private static final LocalDate DATE_VAL = LocalDate.parse("2023-05-18"); + private static final LocalTime TIME_VAL = LocalTime.parse("07:14:21"); + private static final LocalDateTime TS_VAL = LocalDateTime.parse("2023-05-18T07:14:21"); + private static final OffsetDateTime TSZ_VAL = OffsetDateTime.parse("2023-05-18T07:14:21Z"); + private static final BigDecimal DEC_VAL = new BigDecimal("12.34"); + private static final String STR_VAL = "foobar"; + private static final UUID UUID_VAL = UUID.randomUUID(); + private static final ByteBuffer BYTES_VAL = ByteBuffer.wrap(new byte[] {1, 2, 3}); + private static final List LIST_VAL = ImmutableList.of("hello", "world"); + private static final Map MAP_VAL = ImmutableMap.of("one", "1", "two", "2"); + + private static final JsonConverter JSON_CONVERTER = new JsonConverter(); + + private IcebergSinkConfig config; + + @BeforeAll + public static void beforeAll() { + JSON_CONVERTER.configure( + ImmutableMap.of( + JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, + false, + ConverterConfig.TYPE_CONFIG, + ConverterType.VALUE.getName())); + } + + @BeforeEach + public void before() { + this.config = mock(IcebergSinkConfig.class); + when(config.jsonConverter()).thenReturn(JSON_CONVERTER); + } + + @Test + public void testMapConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map data = createMapData(); + Record record = converter.convert(data); + assertRecordValues(record); + } + + @Test + public void testNestedMapConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(NESTED_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map nestedData = createNestedMapData(); + Record record = converter.convert(nestedData); + assertNestedRecordValues(record); + } + + @Test + @SuppressWarnings("unchecked") + public void testMapToString() throws Exception { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map nestedData = createNestedMapData(); + Record record = converter.convert(nestedData); + + String str = (String) record.getField("st"); + Map map = (Map) MAPPER.readValue(str, Map.class); + assertThat(map).hasSize(MAPPED_CNT); + } + + @Test + public void testMapValueInListConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(STRUCT_IN_LIST_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map data = createNestedMapData(); + Record record = converter.convert(ImmutableMap.of("stli", ImmutableList.of(data, data))); + List fieldVal = (List) record.getField("stli"); + + Record elementVal = (Record) fieldVal.get(0); + assertNestedRecordValues(elementVal); + } + + @Test + public void testMapValueInMapConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(STRUCT_IN_MAP_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map data = createNestedMapData(); + Record record = + converter.convert(ImmutableMap.of("stma", ImmutableMap.of("key1", data, "key2", data))); + + Map fieldVal = (Map) record.getField("stma"); + Record mapVal = (Record) fieldVal.get("key1"); + assertNestedRecordValues(mapVal); + } + + @Test + public void testStructConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct data = createStructData(); + Record record = converter.convert(data); + assertRecordValues(record); + } + + @Test + public void testNestedStructConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(NESTED_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct nestedData = createNestedStructData(); + Record record = converter.convert(nestedData); + assertNestedRecordValues(record); + } + + @Test + @SuppressWarnings("unchecked") + public void testStructToString() throws Exception { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct nestedData = createNestedStructData(); + Record record = converter.convert(nestedData); + + String str = (String) record.getField("st"); + Map map = (Map) MAPPER.readValue(str, Map.class); + assertThat(map).hasSize(MAPPED_CNT); + } + + @Test + public void testStructValueInListConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(STRUCT_IN_LIST_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct data = createNestedStructData(); + Struct struct = + new Struct(CONNECT_STRUCT_IN_LIST_SCHEMA).put("stli", ImmutableList.of(data, data)); + Record record = converter.convert(struct); + + List fieldVal = (List) record.getField("stli"); + Record elementVal = (Record) fieldVal.get(0); + assertNestedRecordValues(elementVal); + } + + @Test + public void testStructValueInMapConvert() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(STRUCT_IN_MAP_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct data = createNestedStructData(); + Struct struct = + new Struct(CONNECT_STRUCT_IN_MAP_SCHEMA) + .put("stma", ImmutableMap.of("key1", data, "key2", data)); + Record record = converter.convert(struct); + + Map fieldVal = (Map) record.getField("stma"); + Record mapVal = (Record) fieldVal.get("key1"); + assertNestedRecordValues(mapVal); + } + + @Test + public void testNameMapping() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + + NameMapping nameMapping = NameMapping.of(MappedField.of(1, ImmutableList.of("renamed_ii"))); + when(table.properties()) + .thenReturn( + ImmutableMap.of( + TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(nameMapping))); + + RecordConverter converter = new RecordConverter(table, config); + + Map data = ImmutableMap.of("renamed_ii", 123); + Record record = converter.convert(data); + assertThat(record.getField("ii")).isEqualTo(123); + } + + @ParameterizedTest + @ValueSource(booleans = {false, true}) + public void testCaseSensitivity(boolean caseInsensitive) { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + + when(config.schemaCaseInsensitive()).thenReturn(caseInsensitive); + + RecordConverter converter = new RecordConverter(table, config); + + Map mapData = ImmutableMap.of("II", 123); + Record record1 = converter.convert(mapData); + + Struct structData = + new Struct(SchemaBuilder.struct().field("II", Schema.INT32_SCHEMA).build()).put("II", 123); + Record record2 = converter.convert(structData); + + if (caseInsensitive) { + assertThat(record1.getField("ii")).isEqualTo(123); + assertThat(record2.getField("ii")).isEqualTo(123); + } else { + assertThat(record1.getField("ii")).isEqualTo(null); + assertThat(record2.getField("ii")).isEqualTo(null); + } + } + + @Test + public void testIntConversion() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + + RecordConverter converter = new RecordConverter(table, config); + + int expectedInt = 123; + + ImmutableList.of("123", 123.0f, 123.0d, 123L, expectedInt) + .forEach( + input -> { + int val = converter.convertInt(input); + assertThat(val).isEqualTo(expectedInt); + }); + } + + @Test + public void testLongConversion() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + + RecordConverter converter = new RecordConverter(table, config); + + long expectedLong = 123L; + + ImmutableList.of("123", 123.0f, 123.0d, 123, expectedLong) + .forEach( + input -> { + long val = converter.convertLong(input); + assertThat(val).isEqualTo(expectedLong); + }); + } + + @Test + public void testFloatConversion() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + + RecordConverter converter = new RecordConverter(table, config); + + float expectedFloat = 123f; + + ImmutableList.of("123", 123, 123L, 123d, expectedFloat) + .forEach( + input -> { + float val = converter.convertFloat(input); + assertThat(val).isEqualTo(expectedFloat); + }); + } + + @Test + public void testDoubleConversion() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + + RecordConverter converter = new RecordConverter(table, config); + + double expectedDouble = 123d; + + ImmutableList.of("123", 123, 123L, 123f, expectedDouble) + .forEach( + input -> { + double val = converter.convertDouble(input); + assertThat(val).isEqualTo(expectedDouble); + }); + } + + @Test + public void testDecimalConversion() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + + RecordConverter converter = new RecordConverter(table, config); + + BigDecimal expected = new BigDecimal("123.45"); + + ImmutableList.of("123.45", 123.45d, expected) + .forEach( + input -> { + BigDecimal decimal = converter.convertDecimal(input, DecimalType.of(10, 2)); + assertThat(decimal).isEqualTo(expected); + }); + + BigDecimal expected2 = new BigDecimal(123); + + ImmutableList.of("123", 123, expected2) + .forEach( + input -> { + BigDecimal decimal = converter.convertDecimal(input, DecimalType.of(10, 0)); + assertThat(decimal).isEqualTo(expected2); + }); + } + + @Test + public void testDateConversion() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + LocalDate expected = LocalDate.of(2023, 11, 15); + + List inputList = + ImmutableList.of( + "2023-11-15", + expected.toEpochDay(), + expected, + new Date(Duration.ofDays(expected.toEpochDay()).toMillis())); + + inputList.forEach( + input -> { + Temporal ts = converter.convertDateValue(input); + assertThat(ts).isEqualTo(expected); + }); + } + + @Test + public void testTimeConversion() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + LocalTime expected = LocalTime.of(7, 51, 30, 888_000_000); + + List inputList = + ImmutableList.of( + "07:51:30.888", + expected.toNanoOfDay() / 1000 / 1000, + expected, + new Date(expected.toNanoOfDay() / 1000 / 1000)); + + inputList.forEach( + input -> { + Temporal ts = converter.convertTimeValue(input); + assertThat(ts).isEqualTo(expected); + }); + } + + @Test + public void testTimestampWithZoneConversion() { + OffsetDateTime expected = OffsetDateTime.parse("2023-05-18T11:22:33Z"); + long expectedMillis = expected.toInstant().toEpochMilli(); + assertTimestampConvert(expected, expectedMillis, TimestampType.withZone()); + + // zone should be respected + expected = OffsetDateTime.parse("2023-05-18T03:22:33-08:00"); + List additionalInput = + ImmutableList.of( + "2023-05-18T03:22:33-08", + "2023-05-18 03:22:33-08", + "2023-05-18T03:22:33-08:00", + "2023-05-18 03:22:33-08:00", + "2023-05-18T03:22:33-0800", + "2023-05-18 03:22:33-0800"); + assertTimestampConvert(expected, additionalInput, TimestampType.withZone()); + } + + @Test + public void testTimestampWithoutZoneConversion() { + LocalDateTime expected = LocalDateTime.parse("2023-05-18T11:22:33"); + long expectedMillis = expected.atZone(ZoneOffset.UTC).toInstant().toEpochMilli(); + assertTimestampConvert(expected, expectedMillis, TimestampType.withoutZone()); + + // zone should be ignored + List additionalInput = + ImmutableList.of( + "2023-05-18T11:22:33-08", + "2023-05-18 11:22:33-08", + "2023-05-18T11:22:33-08:00", + "2023-05-18 11:22:33-08:00", + "2023-05-18T11:22:33-0800", + "2023-05-18 11:22:33-0800"); + assertTimestampConvert(expected, additionalInput, TimestampType.withoutZone()); + } + + private void assertTimestampConvert(Temporal expected, long expectedMillis, TimestampType type) { + List inputList = + Lists.newArrayList( + "2023-05-18T11:22:33Z", + "2023-05-18 11:22:33Z", + "2023-05-18T11:22:33+00", + "2023-05-18 11:22:33+00", + "2023-05-18T11:22:33+00:00", + "2023-05-18 11:22:33+00:00", + "2023-05-18T11:22:33+0000", + "2023-05-18 11:22:33+0000", + "2023-05-18T11:22:33", + "2023-05-18 11:22:33", + expectedMillis, + new Date(expectedMillis), + OffsetDateTime.ofInstant(Instant.ofEpochMilli(expectedMillis), ZoneOffset.UTC), + LocalDateTime.ofInstant(Instant.ofEpochMilli(expectedMillis), ZoneOffset.UTC)); + + assertTimestampConvert(expected, inputList, type); + } + + private void assertTimestampConvert( + Temporal expected, List inputList, TimestampType type) { + Table table = mock(Table.class); + when(table.schema()).thenReturn(SIMPLE_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + inputList.forEach( + input -> { + Temporal ts = converter.convertTimestampValue(input, type); + assertThat(ts).isEqualTo(expected); + }); + } + + @Test + public void testMissingColumnDetectionMap() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(ID_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map data = Maps.newHashMap(createMapData()); + data.put("null", null); + + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(data, consumer); + Collection addCols = consumer.addColumns(); + + assertThat(addCols).hasSize(MAPPED_CNT); + + Map newColMap = Maps.newHashMap(); + addCols.forEach(addCol -> newColMap.put(addCol.name(), addCol)); + + assertTypesAddedFromMap(col -> newColMap.get(col).type()); + + // null values should be ignored + assertThat(newColMap).doesNotContainKey("null"); + } + + @Test + public void testMissingColumnDetectionMapNested() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(ID_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map nestedData = createNestedMapData(); + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(nestedData, consumer); + Collection addCols = consumer.addColumns(); + + assertThat(addCols).hasSize(1); + + AddColumn addCol = addCols.iterator().next(); + assertThat(addCol.name()).isEqualTo("st"); + + StructType addedType = addCol.type().asStructType(); + assertThat(addedType.fields()).hasSize(MAPPED_CNT); + assertTypesAddedFromMap(col -> addedType.field(col).type()); + } + + @Test + public void testMissingColumnDetectionMapListValue() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(STRUCT_IN_LIST_BASIC_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Map nestedData = createNestedMapData(); + Map map = ImmutableMap.of("stli", ImmutableList.of(nestedData, nestedData)); + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(map, consumer); + Collection addCols = consumer.addColumns(); + + assertThat(addCols).hasSize(1); + + AddColumn addCol = addCols.iterator().next(); + assertThat(addCol.parentName()).isEqualTo("stli.element"); + assertThat(addCol.name()).isEqualTo("st"); + + StructType nestedElementType = addCol.type().asStructType(); + assertThat(nestedElementType.fields()).hasSize(MAPPED_CNT); + assertTypesAddedFromMap(col -> nestedElementType.field(col).type()); + } + + private void assertTypesAddedFromMap(Function fn) { + assertThat(fn.apply("i")).isInstanceOf(LongType.class); + assertThat(fn.apply("l")).isInstanceOf(LongType.class); + assertThat(fn.apply("d")).isInstanceOf(StringType.class); + assertThat(fn.apply("t")).isInstanceOf(StringType.class); + assertThat(fn.apply("ts")).isInstanceOf(StringType.class); + assertThat(fn.apply("tsz")).isInstanceOf(StringType.class); + assertThat(fn.apply("fl")).isInstanceOf(DoubleType.class); + assertThat(fn.apply("do")).isInstanceOf(DoubleType.class); + assertThat(fn.apply("dec")).isInstanceOf(StringType.class); + assertThat(fn.apply("s")).isInstanceOf(StringType.class); + assertThat(fn.apply("b")).isInstanceOf(BooleanType.class); + assertThat(fn.apply("u")).isInstanceOf(StringType.class); + assertThat(fn.apply("f")).isInstanceOf(StringType.class); + assertThat(fn.apply("bi")).isInstanceOf(StringType.class); + assertThat(fn.apply("li")).isInstanceOf(ListType.class); + assertThat(fn.apply("ma")).isInstanceOf(StructType.class); + } + + @Test + public void testMissingColumnDetectionStruct() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(ID_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct data = createStructData(); + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(data, consumer); + Collection addCols = consumer.addColumns(); + + assertThat(addCols).hasSize(MAPPED_CNT); + + Map newColMap = Maps.newHashMap(); + addCols.forEach(addCol -> newColMap.put(addCol.name(), addCol)); + + assertTypesAddedFromStruct(col -> newColMap.get(col).type()); + } + + @Test + public void testMissingColumnDetectionStructNested() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(ID_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct nestedData = createNestedStructData(); + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(nestedData, consumer); + Collection addCols = consumer.addColumns(); + + assertThat(addCols).hasSize(1); + + AddColumn addCol = addCols.iterator().next(); + assertThat(addCol.name()).isEqualTo("st"); + + StructType addedType = addCol.type().asStructType(); + assertThat(addedType.fields()).hasSize(MAPPED_CNT); + assertTypesAddedFromStruct(col -> addedType.field(col).type()); + } + + @Test + public void testMissingColumnDetectionStructListValue() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(STRUCT_IN_LIST_BASIC_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct nestedData = createNestedStructData(); + Struct struct = + new Struct(CONNECT_STRUCT_IN_LIST_SCHEMA) + .put("stli", ImmutableList.of(nestedData, nestedData)); + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(struct, consumer); + Collection addCols = consumer.addColumns(); + + assertThat(addCols).hasSize(1); + + AddColumn addCol = addCols.iterator().next(); + assertThat(addCol.parentName()).isEqualTo("stli.element"); + assertThat(addCol.name()).isEqualTo("st"); + + StructType nestedElementType = addCol.type().asStructType(); + assertThat(nestedElementType.fields()).hasSize(MAPPED_CNT); + assertTypesAddedFromStruct(col -> nestedElementType.field(col).type()); + } + + @Test + public void testMissingColumnDetectionStructMapValue() { + Table table = mock(Table.class); + when(table.schema()).thenReturn(STRUCT_IN_MAP_BASIC_SCHEMA); + RecordConverter converter = new RecordConverter(table, config); + + Struct nestedData = createNestedStructData(); + Struct struct = + new Struct(CONNECT_STRUCT_IN_MAP_SCHEMA) + .put("stma", ImmutableMap.of("key1", nestedData, "key2", nestedData)); + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(struct, consumer); + Collection addCols = consumer.addColumns(); + + assertThat(addCols).hasSize(1); + + AddColumn addCol = addCols.iterator().next(); + assertThat(addCol.parentName()).isEqualTo("stma.value"); + assertThat(addCol.name()).isEqualTo("st"); + + StructType nestedValueType = addCol.type().asStructType(); + assertThat(nestedValueType.fields()).hasSize(MAPPED_CNT); + assertTypesAddedFromStruct(col -> nestedValueType.field(col).type()); + } + + private void assertTypesAddedFromStruct(Function fn) { + assertThat(fn.apply("i")).isInstanceOf(IntegerType.class); + assertThat(fn.apply("l")).isInstanceOf(LongType.class); + assertThat(fn.apply("d")).isInstanceOf(DateType.class); + assertThat(fn.apply("t")).isInstanceOf(TimeType.class); + assertThat(fn.apply("ts")).isInstanceOf(TimestampType.class); + assertThat(fn.apply("tsz")).isInstanceOf(TimestampType.class); + assertThat(fn.apply("fl")).isInstanceOf(FloatType.class); + assertThat(fn.apply("do")).isInstanceOf(DoubleType.class); + assertThat(fn.apply("dec")).isInstanceOf(DecimalType.class); + assertThat(fn.apply("s")).isInstanceOf(StringType.class); + assertThat(fn.apply("b")).isInstanceOf(BooleanType.class); + assertThat(fn.apply("u")).isInstanceOf(StringType.class); + assertThat(fn.apply("f")).isInstanceOf(BinaryType.class); + assertThat(fn.apply("bi")).isInstanceOf(BinaryType.class); + assertThat(fn.apply("li")).isInstanceOf(ListType.class); + assertThat(fn.apply("ma")).isInstanceOf(MapType.class); + } + + @Test + public void testEvolveTypeDetectionStruct() { + org.apache.iceberg.Schema tableSchema = + new org.apache.iceberg.Schema( + NestedField.required(1, "ii", IntegerType.get()), + NestedField.required(2, "ff", FloatType.get())); + + Table table = mock(Table.class); + when(table.schema()).thenReturn(tableSchema); + RecordConverter converter = new RecordConverter(table, config); + + Schema valueSchema = + SchemaBuilder.struct().field("ii", Schema.INT64_SCHEMA).field("ff", Schema.FLOAT64_SCHEMA); + Struct data = new Struct(valueSchema).put("ii", 11L).put("ff", 22d); + + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(data, consumer); + Collection updates = consumer.updateTypes(); + + assertThat(updates).hasSize(2); + + Map updateMap = Maps.newHashMap(); + updates.forEach(update -> updateMap.put(update.name(), update)); + + assertThat(updateMap.get("ii").type()).isInstanceOf(LongType.class); + assertThat(updateMap.get("ff").type()).isInstanceOf(DoubleType.class); + } + + @Test + public void testEvolveTypeDetectionStructNested() { + org.apache.iceberg.Schema structColSchema = + new org.apache.iceberg.Schema( + NestedField.required(1, "ii", IntegerType.get()), + NestedField.required(2, "ff", FloatType.get())); + + org.apache.iceberg.Schema tableSchema = + new org.apache.iceberg.Schema( + NestedField.required(3, "i", IntegerType.get()), + NestedField.required(4, "st", structColSchema.asStruct())); + + Table table = mock(Table.class); + when(table.schema()).thenReturn(tableSchema); + RecordConverter converter = new RecordConverter(table, config); + + Schema structSchema = + SchemaBuilder.struct().field("ii", Schema.INT64_SCHEMA).field("ff", Schema.FLOAT64_SCHEMA); + Schema schema = + SchemaBuilder.struct().field("i", Schema.INT32_SCHEMA).field("st", structSchema); + Struct structValue = new Struct(structSchema).put("ii", 11L).put("ff", 22d); + Struct data = new Struct(schema).put("i", 1).put("st", structValue); + + SchemaUpdate.Consumer consumer = new SchemaUpdate.Consumer(); + converter.convert(data, consumer); + Collection updates = consumer.updateTypes(); + + assertThat(updates).hasSize(2); + + Map updateMap = Maps.newHashMap(); + updates.forEach(update -> updateMap.put(update.name(), update)); + + assertThat(updateMap.get("st.ii").type()).isInstanceOf(LongType.class); + assertThat(updateMap.get("st.ff").type()).isInstanceOf(DoubleType.class); + } + + private Map createMapData() { + return ImmutableMap.builder() + .put("i", 1) + .put("l", 2L) + .put("d", DATE_VAL.toString()) + .put("t", TIME_VAL.toString()) + .put("ts", TS_VAL.toString()) + .put("tsz", TSZ_VAL.toString()) + .put("fl", 1.1f) + .put("do", 2.2d) + .put("dec", DEC_VAL.toString()) + .put("s", STR_VAL) + .put("b", true) + .put("u", UUID_VAL.toString()) + .put("f", Base64.getEncoder().encodeToString(BYTES_VAL.array())) + .put("bi", Base64.getEncoder().encodeToString(BYTES_VAL.array())) + .put("li", LIST_VAL) + .put("ma", MAP_VAL) + .build(); + } + + private Map createNestedMapData() { + return ImmutableMap.builder().put("ii", 11).put("st", createMapData()).build(); + } + + private Struct createStructData() { + return new Struct(CONNECT_SCHEMA) + .put("i", 1) + .put("l", 2L) + .put("d", new Date(DATE_VAL.toEpochDay() * 24 * 60 * 60 * 1000L)) + .put("t", new Date(TIME_VAL.toNanoOfDay() / 1_000_000)) + .put("ts", Date.from(TS_VAL.atZone(ZoneOffset.UTC).toInstant())) + .put("tsz", Date.from(TSZ_VAL.toInstant())) + .put("fl", 1.1f) + .put("do", 2.2d) + .put("dec", DEC_VAL) + .put("s", STR_VAL) + .put("b", true) + .put("u", UUID_VAL.toString()) + .put("f", BYTES_VAL.array()) + .put("bi", BYTES_VAL.array()) + .put("li", LIST_VAL) + .put("ma", MAP_VAL); + } + + private Struct createNestedStructData() { + return new Struct(CONNECT_NESTED_SCHEMA).put("ii", 11).put("st", createStructData()); + } + + private void assertRecordValues(Record record) { + GenericRecord rec = (GenericRecord) record; + assertThat(rec.getField("i")).isEqualTo(1); + assertThat(rec.getField("l")).isEqualTo(2L); + assertThat(rec.getField("d")).isEqualTo(DATE_VAL); + assertThat(rec.getField("t")).isEqualTo(TIME_VAL); + assertThat(rec.getField("ts")).isEqualTo(TS_VAL); + assertThat(rec.getField("tsz")).isEqualTo(TSZ_VAL); + assertThat(rec.getField("fl")).isEqualTo(1.1f); + assertThat(rec.getField("do")).isEqualTo(2.2d); + assertThat(rec.getField("dec")).isEqualTo(DEC_VAL); + assertThat(rec.getField("s")).isEqualTo(STR_VAL); + assertThat(rec.getField("b")).isEqualTo(true); + assertThat(rec.getField("u")).isEqualTo(UUID_VAL); + assertThat(rec.getField("f")).isEqualTo(BYTES_VAL); + assertThat(rec.getField("bi")).isEqualTo(BYTES_VAL); + assertThat(rec.getField("li")).isEqualTo(LIST_VAL); + assertThat(rec.getField("ma")).isEqualTo(MAP_VAL); + } + + private void assertNestedRecordValues(Record record) { + GenericRecord rec = (GenericRecord) record; + assertThat(rec.getField("ii")).isEqualTo(11); + assertRecordValues((GenericRecord) rec.getField("st")); + } +} From 9cbc2f43c4a7e3feb15703dd4a0dd0f4423f2ced Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Fri, 22 Mar 2024 08:19:29 +0100 Subject: [PATCH 22/23] Core: Use as type parameter instead of raw type for SnapshotUpdate (#10015) --- .../test/java/org/apache/iceberg/TableTestBase.java | 10 +++++----- core/src/test/java/org/apache/iceberg/TestBase.java | 10 +++++----- .../src/test/java/org/apache/iceberg/TestRowDelta.java | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java b/core/src/test/java/org/apache/iceberg/TableTestBase.java index c3db85910138..a92497f88e87 100644 --- a/core/src/test/java/org/apache/iceberg/TableTestBase.java +++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java @@ -375,24 +375,24 @@ void validateSnapshot(Snapshot old, Snapshot snap, long sequenceNumber, DataFile } @SuppressWarnings("checkstyle:HiddenField") - Snapshot commit(Table table, SnapshotUpdate snapshotUpdate, String branch) { + Snapshot commit(Table table, SnapshotUpdate snapshotUpdate, String branch) { Snapshot snapshot; if (branch.equals(SnapshotRef.MAIN_BRANCH)) { snapshotUpdate.commit(); snapshot = table.currentSnapshot(); } else { - ((SnapshotProducer) snapshotUpdate.toBranch(branch)).commit(); + ((SnapshotProducer) snapshotUpdate.toBranch(branch)).commit(); snapshot = table.snapshot(branch); } return snapshot; } - Snapshot apply(SnapshotUpdate snapshotUpdate, String branch) { + Snapshot apply(SnapshotUpdate snapshotUpdate, String branch) { if (branch.equals(SnapshotRef.MAIN_BRANCH)) { - return ((SnapshotProducer) snapshotUpdate).apply(); + return ((SnapshotProducer) snapshotUpdate).apply(); } else { - return ((SnapshotProducer) snapshotUpdate.toBranch(branch)).apply(); + return ((SnapshotProducer) snapshotUpdate.toBranch(branch)).apply(); } } diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java b/core/src/test/java/org/apache/iceberg/TestBase.java index ebd527ded306..10aa57abf6f3 100644 --- a/core/src/test/java/org/apache/iceberg/TestBase.java +++ b/core/src/test/java/org/apache/iceberg/TestBase.java @@ -366,24 +366,24 @@ void validateSnapshot(Snapshot old, Snapshot snap, long sequenceNumber, DataFile } @SuppressWarnings("checkstyle:HiddenField") - Snapshot commit(Table table, SnapshotUpdate snapshotUpdate, String branch) { + Snapshot commit(Table table, SnapshotUpdate snapshotUpdate, String branch) { Snapshot snapshot; if (branch.equals(SnapshotRef.MAIN_BRANCH)) { snapshotUpdate.commit(); snapshot = table.currentSnapshot(); } else { - ((SnapshotProducer) snapshotUpdate.toBranch(branch)).commit(); + ((SnapshotProducer) snapshotUpdate.toBranch(branch)).commit(); snapshot = table.snapshot(branch); } return snapshot; } - Snapshot apply(SnapshotUpdate snapshotUpdate, String branch) { + Snapshot apply(SnapshotUpdate snapshotUpdate, String branch) { if (branch.equals(SnapshotRef.MAIN_BRANCH)) { - return ((SnapshotProducer) snapshotUpdate).apply(); + return ((SnapshotProducer) snapshotUpdate).apply(); } else { - return ((SnapshotProducer) snapshotUpdate.toBranch(branch)).apply(); + return ((SnapshotProducer) snapshotUpdate.toBranch(branch)).apply(); } } diff --git a/core/src/test/java/org/apache/iceberg/TestRowDelta.java b/core/src/test/java/org/apache/iceberg/TestRowDelta.java index fa04f36d367f..56bab52edaf9 100644 --- a/core/src/test/java/org/apache/iceberg/TestRowDelta.java +++ b/core/src/test/java/org/apache/iceberg/TestRowDelta.java @@ -62,7 +62,7 @@ public TestRowDelta(String branch) { @Test public void testAddDeleteFile() { - SnapshotUpdate rowDelta = + SnapshotUpdate rowDelta = table.newRowDelta().addRows(FILE_A).addDeletes(FILE_A_DELETES).addDeletes(FILE_B_DELETES); commit(table, rowDelta, branch); @@ -97,7 +97,7 @@ public void testAddDeleteFile() { @Test public void testValidateDataFilesExistDefaults() { - SnapshotUpdate rowDelta1 = table.newAppend().appendFile(FILE_A).appendFile(FILE_B); + SnapshotUpdate rowDelta1 = table.newAppend().appendFile(FILE_A).appendFile(FILE_B); commit(table, rowDelta1, branch); @@ -105,12 +105,12 @@ public void testValidateDataFilesExistDefaults() { long validateFromSnapshotId = latestSnapshot(table, branch).snapshotId(); // overwrite FILE_A - SnapshotUpdate rowDelta2 = table.newOverwrite().deleteFile(FILE_A).addFile(FILE_A2); + SnapshotUpdate rowDelta2 = table.newOverwrite().deleteFile(FILE_A).addFile(FILE_A2); commit(table, rowDelta2, branch); // delete FILE_B - SnapshotUpdate rowDelta3 = table.newDelete().deleteFile(FILE_B); + SnapshotUpdate rowDelta3 = table.newDelete().deleteFile(FILE_B); commit(table, rowDelta3, branch); From c9795fda7105789edc0d1f8a624ceb251dfd4a69 Mon Sep 17 00:00:00 2001 From: "Brian \"bits\" Olsen" Date: Fri, 22 Mar 2024 02:23:36 -0500 Subject: [PATCH 23/23] Docs: Add local nightly build to test current docs changes (#9943) --- site/README.md | 13 ++++++++++++- site/dev/common.sh | 26 +++++++++++++++++++++++++- site/nav.yml | 1 + 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/site/README.md b/site/README.md index 63394635b463..6cb4f4907ebf 100644 --- a/site/README.md +++ b/site/README.md @@ -60,7 +60,12 @@ The static Iceberg site pages are Markdown files that live at `/site/docs/*.md`. ``` ### Building the versioned docs -The Iceberg versioned docs are committed in the [orphan `docs` branch](https://github.com/apache/iceberg/tree/docs) and mounted using [git worktree](https://git-scm.com/docs/git-worktree) at build time. The `docs` branch contains the versioned documenation source files at the root. These versions are mounted at the `/site/docs/docs/` directory at build time. The `latest` version, is a soft link to the most recent [semver version](https://semver.org/) in the `docs` branch. There is also an [orphan `javadoc` branch](https://github.com/apache/iceberg/tree/javadoc) that contains prior staticly generated versions of the javadocs mounted at `/site/docs/javadoc/` during build time. +The Iceberg versioned docs are committed in two [orphan](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddeforphanaorphan) branches and mounted using [git worktree](https://git-scm.com/docs/git-worktree) at build time: + + 1. [`docs`](https://github.com/apache/iceberg/tree/docs) - contains the state of the documenation source files (`/docs`) during release. These versions are mounted at the `/site/docs/docs/` directory at build time. + 1. [`javadoc`](https://github.com/apache/iceberg/tree/javadoc) - contains prior statically generated versions of the javadocs mounted at `/site/docs/javadoc/` directory at build time. + +The `latest` version, is a soft link to the most recent [semver version](https://semver.org/) in the `docs` branch. The `nightly` version, is a soft link to the current local state of the `/docs` markdown files. The docs are built, run, and released using [make](https://www.gnu.org/software/make/manual/make.html). The [Makefile](Makefile) and the [common shell script](dev/common.sh) support the following command: @@ -84,11 +89,13 @@ This step will generate the staged source code which blends into the original so ./site/ └── docs    ├── docs +    │ ├── nightly (symlink to /docs/)    │ ├── latest (symlink to /site/docs/1.4.0/)    │ ├── 1.4.0    │ ├── 1.3.1    │   └── ... ├── javadoc +    │ ├── nightly (currently points to latest)    │ ├── latest    │ ├── 1.4.0    │ ├── 1.3.1 @@ -143,6 +150,9 @@ As mentioned in the MkDocs section, when you build MkDocs `mkdocs build`, MkDocs ./site/ ├── docs │   ├── docs +│   │   ├── nightly +│   │   │ ├── docs +│   │   │ └── mkdocs.yml │   │   ├── latest │   │   │ ├── docs │   │   │ └── mkdocs.yml @@ -150,6 +160,7 @@ As mentioned in the MkDocs section, when you build MkDocs `mkdocs build`, MkDocs │   │   ├── docs │   │ └── mkdocs.yml │   └─ javadoc +│   ├── nightly │   ├── latest │   └── 1.4.0 └── mkdocs.yml diff --git a/site/dev/common.sh b/site/dev/common.sh index 6eb0cf34a9c0..481628aa4dbd 100755 --- a/site/dev/common.sh +++ b/site/dev/common.sh @@ -85,6 +85,26 @@ assert_not_empty () { fi } +# Creates a 'nightly' version of the documentation that points to the current versioned docs +# located at the root-level `/docs` directory. +create_nightly () { + echo " --> create nightly" + + # Remove any existing 'nightly' directory and recreate it + rm -rf docs/docs/nightly/ + mkdir docs/docs/nightly/ + + # Create symbolic links and copy configuration files for the 'nightly' documentation + ln -s "../../../../docs/docs/" docs/docs/nightly/docs + cp "../docs/mkdocs.yml" docs/docs/nightly/ + + cd docs/docs/ + + # Update version information within the 'nightly' documentation + update_version "nightly" + cd - +} + # Finds and retrieves the latest version of the documentation based on the directory structure. # Assumes the documentation versions are numeric folders within 'docs/docs/'. get_latest_version () { @@ -189,6 +209,9 @@ pull_versioned_docs () { # Create the 'latest' version of documentation create_latest "${latest_version}" + + # Create the 'nightly' version of documentation + create_nightly } # Cleans up artifacts and temporary files generated during documentation management. @@ -198,8 +221,9 @@ clean () { # Temporarily disable script exit on errors to ensure cleanup continues set +e - # Remove 'latest' directories and related Git worktrees + # Remove temp directories and related Git worktrees rm -rf docs/docs/latest &> /dev/null + rm -rf docs/docs/nightly &> /dev/null git worktree remove docs/docs &> /dev/null git worktree remove docs/javadoc &> /dev/null diff --git a/site/nav.yml b/site/nav.yml index aa5c77b46122..4a3548433636 100644 --- a/site/nav.yml +++ b/site/nav.yml @@ -21,6 +21,7 @@ nav: - Spark: spark-quickstart.md - Hive: hive-quickstart.md - Docs: + - nightly: '!include docs/docs/nightly/mkdocs.yml' - latest: '!include docs/docs/latest/mkdocs.yml' - 1.5.0: '!include docs/docs/1.5.0/mkdocs.yml' - 1.4.3: '!include docs/docs/1.4.3/mkdocs.yml'