From d6563c801613daf015e5f54f27ead56947ab7ed3 Mon Sep 17 00:00:00 2001 From: Yueyang Qiu Date: Wed, 12 Aug 2020 10:52:04 -0700 Subject: [PATCH 1/2] Follow the same way that BigQuery handles unspecified or duplicate ZetaSQL STRUCT field name --- .../sql/zetasql/ZetaSqlCalciteTranslationUtils.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java index 620342ee5ee5..0eaadcac9d8b 100644 --- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java +++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java @@ -27,7 +27,9 @@ import java.math.BigDecimal; import java.time.LocalDateTime; import java.time.LocalTime; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamBigQuerySqlDialect; @@ -180,12 +182,19 @@ private static RelDataType toCalciteStructType( private static List getFieldNameList(List fields) { ImmutableList.Builder b = ImmutableList.builder(); + Set usedName = new HashSet<>(); for (int i = 0; i < fields.size(); i++) { String name = fields.get(i).getName(); - if ("".equals(name)) { - name = "$col" + i; // avoid empty field names because Beam does not allow duplicate names + // Follow the same way that BigQuery handles unspecified or duplicate field name + if ("".equals(name) || usedName.contains(name)) { + name = "_field_" + (i + 1); // BigQuery uses 1-based default field name + if (usedName.contains(name)) { + throw new UnsupportedOperationException( + name + " is not a valid field name. Please use another name for the field."); + } } b.add(name); + usedName.add(name); } return b.build(); } From 9dfd39ff1a100ba2b0b43a643b2529e6c0871b79 Mon Sep 17 00:00:00 2001 From: Yueyang Qiu Date: Wed, 12 Aug 2020 11:47:10 -0700 Subject: [PATCH 2/2] Handle field names starting with _field_ --- .../sql/zetasql/ZetaSqlCalciteTranslationUtils.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java index 0eaadcac9d8b..6dc3c1e8d482 100644 --- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java +++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/ZetaSqlCalciteTranslationUtils.java @@ -186,12 +186,8 @@ private static List getFieldNameList(List fields) { for (int i = 0; i < fields.size(); i++) { String name = fields.get(i).getName(); // Follow the same way that BigQuery handles unspecified or duplicate field name - if ("".equals(name) || usedName.contains(name)) { + if ("".equals(name) || name.startsWith("_field_") || usedName.contains(name)) { name = "_field_" + (i + 1); // BigQuery uses 1-based default field name - if (usedName.contains(name)) { - throw new UnsupportedOperationException( - name + " is not a valid field name. Please use another name for the field."); - } } b.add(name); usedName.add(name);