apache · luozenglin · Mar 6, 2023 · Mar 1, 2023 · Mar 1, 2023 · Mar 3, 2023
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
@@ -264,16 +264,16 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNod
     }
 }
 
-void TypeDescriptor::add_sub_type(TypeDescriptor&& sub_type, bool&& is_nullable) {
-    children.emplace_back(sub_type);
-    contains_nulls.emplace_back(is_nullable);
+void TypeDescriptor::add_sub_type(TypeDescriptor sub_type, bool is_nullable) {
+    children.push_back(std::move(sub_type));
+    contains_nulls.push_back(is_nullable);
 }
 
-void TypeDescriptor::add_sub_type(TypeDescriptor&& sub_type, std::string&& field_name,
-                                  bool&& is_nullable) {
-    children.emplace_back(sub_type);
-    field_names.emplace_back(field_name);
-    contains_nulls.emplace_back(is_nullable);
+void TypeDescriptor::add_sub_type(TypeDescriptor sub_type, std::string field_name,
+                                  bool is_nullable) {
+    children.push_back(std::move(sub_type));
+    field_names.push_back(std::move(field_name));
+    contains_nulls.push_back(is_nullable);
 }
 
 std::string TypeDescriptor::debug_string() const {

diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
@@ -64,6 +64,7 @@ struct TypeDescriptor {
     std::vector<std::string> field_names;
 
     // Used for complex types only.
+    // Whether subtypes of a complex type is nullable
     std::vector<bool> contains_nulls;
 
     TypeDescriptor() : type(INVALID_TYPE), len(-1), precision(-1), scale(-1) {}
@@ -220,11 +221,10 @@ struct TypeDescriptor {
     std::string debug_string() const;
 
     // use to array type and map type add sub type
-    void add_sub_type(TypeDescriptor&& sub_type, bool&& is_nullable = true);
+    void add_sub_type(TypeDescriptor sub_type, bool is_nullable = true);
 
     // use to struct type add sub type
-    void add_sub_type(TypeDescriptor&& sub_type, std::string&& field_name,
-                      bool&& is_nullable = true);
+    void add_sub_type(TypeDescriptor sub_type, std::string field_name, bool is_nullable = true);
 
 private:
     /// Used to create a possibly nested type from the flattened Thrift representation.

diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp
@@ -68,7 +68,7 @@ void DataTypeNullable::to_string(const IColumn& column, size_t row_num,
     if (col_null.is_null_at(row_num)) {
         ostr.write("NULL", 4);
     } else {
-        get_nested_type()->to_string(col_null, row_num, ostr);
+        get_nested_type()->to_string(col_null.get_nested_column(), row_num, ostr);
     }
 }
 

diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -136,6 +136,7 @@ Status FieldDescriptor::parse_node_field(const std::vector<tparquet::SchemaEleme
         transform(t_schema.name.begin(), t_schema.name.end(), lower_case_name.begin(), ::tolower);
         node_field->name = lower_case_name;
         node_field->type.type = TYPE_ARRAY;
+        node_field->type.add_sub_type(child->type);
         node_field->is_nullable = false;
         _next_schema_pos = curr_pos + 1;
     } else {
@@ -325,6 +326,7 @@ Status FieldDescriptor::parse_group_field(const std::vector<tparquet::SchemaElem
 
         group_field->name = group_schema.name;
         group_field->type.type = TYPE_ARRAY;
+        group_field->type.add_sub_type(struct_field->type);
         group_field->is_nullable = false;
     } else {
         RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos, group_field));
@@ -392,7 +394,7 @@ Status FieldDescriptor::parse_list_field(const std::vector<tparquet::SchemaEleme
 
     list_field->name = first_level.name;
     list_field->type.type = TYPE_ARRAY;
-    list_field->type.children.push_back(list_field->children[0].type);
+    list_field->type.add_sub_type(list_field->children[0].type);
     list_field->is_nullable = is_optional;
 
     return Status::OK();
@@ -454,6 +456,7 @@ Status FieldDescriptor::parse_map_field(const std::vector<tparquet::SchemaElemen
 
     map_field->name = map_schema.name;
     map_field->type.type = TYPE_MAP;
+    map_field->type.add_sub_type(map_kv_field->type);
     map_field->is_nullable = is_optional;
 
     return Status::OK();
@@ -477,6 +480,9 @@ Status FieldDescriptor::parse_struct_field(const std::vector<tparquet::SchemaEle
     struct_field->name = struct_schema.name;
     struct_field->is_nullable = is_optional;
     struct_field->type.type = TYPE_STRUCT;
+    for (int i = 0; i < num_children; ++i) {
+        struct_field->type.add_sub_type(struct_field->children[i].type);
+    }
     return Status::OK();
 }
 

diff --git a/regression-test/data/load_p0/stream_load/array_test.parquet b/regression-test/data/load_p0/stream_load/array_test.parquet
diff --git a/regression-test/data/load_p0/stream_load/test_parquet_orc_case.out b/regression-test/data/load_p0/stream_load/test_parquet_orc_case.out
diff --git a/regression-test/suites/load_p0/stream_load/test_parquet_orc_case.groovy b/regression-test/suites/load_p0/stream_load/test_parquet_orc_case.groovy
@@ -206,5 +206,43 @@ suite("test_parquet_orc_case", "p0") {
 
 
     sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    def arrayParquetTbl = "test_array_parquet_tb"
+    sql """ DROP TABLE IF EXISTS ${arrayParquetTbl} """
+
+    sql """
+    CREATE TABLE ${arrayParquetTbl} ( 
+        k1 int NULL, 
+        a1 array<boolean> NULL, 
+        a2 array<tinyint> NULL, 
+        a3 array<smallint> NULL, 
+        a4 array<int> NULL, 
+        a5 array<bigint> NULL, 
+        a6 array<largeint> NULL,
+        a7 array<decimal(27, 7)> NULL, 
+        a8 array<float> NULL, 
+        a9 array<double> NULL, 
+        a10 array<date> NULL, 
+        a11 array<datetime> NULL, 
+        a12 array<char(20)> NULL, 
+        a13 array<varchar(50)> NULL, 
+        a14 array<string> NULL 
+    ) 
+    DUPLICATE KEY(k1) 
+    DISTRIBUTED BY HASH(k1) BUCKETS 5
+    PROPERTIES(
+        "replication_num"="1"
+    );
+    """
+
+    streamLoad {
+        table "${arrayParquetTbl}"
+        set 'format', 'parquet'
+        set 'columns', '`k1`, `a1`, `a2`, `a3`, `a4`, `a5`, `a6`, `a7`, `a8`, `a9`, `a10`, `a11`, `a12`, `a13`, `a14`'
+        file 'array_test.parquet'
+        time 10000 // limit inflight 10s
+    }
+    sql "sync"
+    qt_sql_array_parquet "select * from ${arrayParquetTbl} order by k1 limit 3"
 }