From d18061f815983e605937825faa76df34f459953e Mon Sep 17 00:00:00 2001 From: openinx Date: Mon, 8 Jun 2020 21:34:16 +0800 Subject: [PATCH 1/3] Reuse the common methods in serveral RandomData classes. --- .../org/apache/iceberg/util/RandomUtil.java | 184 ++++++++++++++++++ .../apache/iceberg/avro/RandomAvroData.java | 158 +-------------- .../iceberg/data/RandomGenericData.java | 170 +--------------- .../apache/iceberg/spark/data/RandomData.java | 165 ++-------------- 4 files changed, 208 insertions(+), 469 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/util/RandomUtil.java diff --git a/core/src/main/java/org/apache/iceberg/util/RandomUtil.java b/core/src/main/java/org/apache/iceberg/util/RandomUtil.java new file mode 100644 index 000000000000..3cf5039a5445 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/util/RandomUtil.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.util; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.Random; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +public class RandomUtil { + + private RandomUtil() { + + } + + @SuppressWarnings("RandomModInteger") + public static Object generatePrimitive(Type.PrimitiveType primitive, + Random random) { + int choice = random.nextInt(20); + + switch (primitive.typeId()) { + case BOOLEAN: + return choice < 10; + + case INTEGER: + switch (choice) { + case 1: + return Integer.MIN_VALUE; + case 2: + return Integer.MAX_VALUE; + case 3: + return 0; + default: + return random.nextInt(); + } + + case LONG: + switch (choice) { + case 1: + return Long.MIN_VALUE; + case 2: + return Long.MAX_VALUE; + case 3: + return 0L; + default: + return random.nextLong(); + } + + case FLOAT: + switch (choice) { + case 1: + return Float.MIN_VALUE; + case 2: + return -Float.MIN_VALUE; + case 3: + return Float.MAX_VALUE; + case 4: + return -Float.MAX_VALUE; + case 5: + return Float.NEGATIVE_INFINITY; + case 6: + return Float.POSITIVE_INFINITY; + case 7: + return 0.0F; + case 8: + return Float.NaN; + default: + return random.nextFloat(); + } + + case DOUBLE: + switch (choice) { + case 1: + return Double.MIN_VALUE; + case 2: + return -Double.MIN_VALUE; + case 3: + return Double.MAX_VALUE; + case 4: + return -Double.MAX_VALUE; + case 5: + return Double.NEGATIVE_INFINITY; + case 6: + return Double.POSITIVE_INFINITY; + case 7: + return 0.0D; + case 8: + return Double.NaN; + default: + return random.nextDouble(); + } + + case DATE: + // this will include negative values (dates before 1970-01-01) + return random.nextInt() % ABOUT_380_YEARS_IN_DAYS; + + case TIME: + return (random.nextLong() & Integer.MAX_VALUE) % ONE_DAY_IN_MICROS; + + case TIMESTAMP: + return random.nextLong() % FIFTY_YEARS_IN_MICROS; + + case STRING: + return randomString(random); + + case UUID: + byte[] uuidBytes = new byte[16]; + random.nextBytes(uuidBytes); + // this will hash the uuidBytes + return uuidBytes; + + case FIXED: + byte[] fixed = new byte[((Types.FixedType) primitive).length()]; + random.nextBytes(fixed); + return fixed; + + case BINARY: + byte[] binary = new byte[random.nextInt(50)]; + random.nextBytes(binary); + return binary; + + case DECIMAL: + Types.DecimalType type = (Types.DecimalType) primitive; + BigInteger unscaled = randomUnscaled(type.precision(), random); + return new BigDecimal(unscaled, type.scale()); + + default: + throw new IllegalArgumentException( + "Cannot generate random value for unknown type: " + primitive); + } + } + + private static final long FIFTY_YEARS_IN_MICROS = + (50L * (365 * 3 + 366) * 24 * 60 * 60 * 1_000_000) / 4; + private static final int ABOUT_380_YEARS_IN_DAYS = 380 * 365; + private static final long ONE_DAY_IN_MICROS = 24 * 60 * 60 * 1_000_000L; + private static final String CHARS = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-.!?"; + + private static String randomString(Random random) { + int length = random.nextInt(50); + byte[] buffer = new byte[length]; + + for (int i = 0; i < length; i += 1) { + buffer[i] = (byte) CHARS.charAt(random.nextInt(CHARS.length())); + } + + return new String(buffer); + } + + private static final String DIGITS = "0123456789"; + + private static BigInteger randomUnscaled(int precision, Random random) { + int length = random.nextInt(precision); + if (length == 0) { + return BigInteger.ZERO; + } + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i += 1) { + sb.append(DIGITS.charAt(random.nextInt(DIGITS.length()))); + } + + return new BigInteger(sb.toString()); + } +} diff --git a/core/src/test/java/org/apache/iceberg/avro/RandomAvroData.java b/core/src/test/java/org/apache/iceberg/avro/RandomAvroData.java index d36731d9d41b..1c3d9adf26dc 100644 --- a/core/src/test/java/org/apache/iceberg/avro/RandomAvroData.java +++ b/core/src/test/java/org/apache/iceberg/avro/RandomAvroData.java @@ -19,8 +19,6 @@ package org.apache.iceberg.avro; -import java.math.BigDecimal; -import java.math.BigInteger; import java.nio.ByteBuffer; import java.util.List; import java.util.Map; @@ -38,6 +36,7 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.RandomUtil; public class RandomAvroData { @@ -140,10 +139,12 @@ public Object map(Types.MapType map, Supplier keyResult, Supplier keyResult, Supplier keyResult, Supplier keyResult, Supplier Date: Mon, 8 Jun 2020 22:10:03 +0800 Subject: [PATCH 2/3] Fix the failed unit test --- .../org/apache/iceberg/data/RandomGenericData.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java b/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java index 52b365530001..e97656ac8fc0 100644 --- a/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java +++ b/data/src/test/java/org/apache/iceberg/data/RandomGenericData.java @@ -21,6 +21,8 @@ import java.nio.ByteBuffer; import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.util.List; @@ -144,12 +146,16 @@ public Object primitive(Type.PrimitiveType primitive) { return ByteBuffer.wrap((byte[]) result); case UUID: return UUID.nameUUIDFromBytes((byte[]) result); + case DATE: + return EPOCH_DAY.plusDays((Integer) result); + case TIME: + return LocalTime.ofNanoOfDay((long) result * 1000); case TIMESTAMP: Types.TimestampType ts = (Types.TimestampType) primitive; if (ts.shouldAdjustToUTC()) { - return EPOCH.plus(random.nextLong() % FIFTY_YEARS_IN_MICROS, MICROS); + return EPOCH.plus((long) result, MICROS); } else { - return EPOCH.plus(random.nextLong() % FIFTY_YEARS_IN_MICROS, MICROS).toLocalDateTime(); + return EPOCH.plus((long) result, MICROS).toLocalDateTime(); } default: return result; @@ -158,6 +164,5 @@ public Object primitive(Type.PrimitiveType primitive) { } private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); - private static final long FIFTY_YEARS_IN_MICROS = - (50L * (365 * 3 + 366) * 24 * 60 * 60 * 1_000_000) / 4; + private static final LocalDate EPOCH_DAY = EPOCH.toLocalDate(); } From 22599fd4bc575baca80680f6eb2e1a137c1b0f63 Mon Sep 17 00:00:00 2001 From: openinx Date: Wed, 10 Jun 2020 09:57:11 +0800 Subject: [PATCH 3/3] Fix the failure unit test and move the utility to src/test --- .../src/test}/java/org/apache/iceberg/util/RandomUtil.java | 1 - .../test/java/org/apache/iceberg/spark/data/RandomData.java | 4 ---- 2 files changed, 5 deletions(-) rename {core/src/main => api/src/test}/java/org/apache/iceberg/util/RandomUtil.java (99%) diff --git a/core/src/main/java/org/apache/iceberg/util/RandomUtil.java b/api/src/test/java/org/apache/iceberg/util/RandomUtil.java similarity index 99% rename from core/src/main/java/org/apache/iceberg/util/RandomUtil.java rename to api/src/test/java/org/apache/iceberg/util/RandomUtil.java index 3cf5039a5445..d621c6bdb0b4 100644 --- a/core/src/main/java/org/apache/iceberg/util/RandomUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/RandomUtil.java @@ -28,7 +28,6 @@ public class RandomUtil { private RandomUtil() { - } @SuppressWarnings("RandomModInteger") diff --git a/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java b/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java index 8d0f4592b68c..b5f0b7153b7a 100644 --- a/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java +++ b/spark/src/test/java/org/apache/iceberg/spark/data/RandomData.java @@ -187,8 +187,6 @@ public Object primitive(Type.PrimitiveType primitive) { // For the primitives that Avro needs a different type than Spark, fix // them here. switch (primitive.typeId()) { - case STRING: - return ((UTF8String) result).toString(); case FIXED: return new GenericData.Fixed(typeToSchema.get(primitive), (byte[]) result); @@ -196,8 +194,6 @@ public Object primitive(Type.PrimitiveType primitive) { return ByteBuffer.wrap((byte[]) result); case UUID: return UUID.nameUUIDFromBytes((byte[]) result); - case DECIMAL: - return ((Decimal) result).toJavaBigDecimal(); default: return result; }