Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions be/src/runtime/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,16 +264,16 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNod
}
}

void TypeDescriptor::add_sub_type(TypeDescriptor&& sub_type, bool&& is_nullable) {
children.emplace_back(sub_type);
contains_nulls.emplace_back(is_nullable);
void TypeDescriptor::add_sub_type(TypeDescriptor sub_type, bool is_nullable) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OrcReader has used this method, and use right values as parameters.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function can match right values, and if the argument passed in is a right value, the overhead is two move operations

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, it looks well. How about another polymorphic function add_sub_type(TypeDescriptor&& sub_type, std::string&& field_name, bool&& is_nullable)? It feels strange that the parameter forms of the two functions are not uniform.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

children.push_back(std::move(sub_type));
contains_nulls.push_back(is_nullable);
}

void TypeDescriptor::add_sub_type(TypeDescriptor&& sub_type, std::string&& field_name,
bool&& is_nullable) {
children.emplace_back(sub_type);
field_names.emplace_back(field_name);
contains_nulls.emplace_back(is_nullable);
void TypeDescriptor::add_sub_type(TypeDescriptor sub_type, std::string field_name,
bool is_nullable) {
children.push_back(std::move(sub_type));
field_names.push_back(std::move(field_name));
contains_nulls.push_back(is_nullable);
}

std::string TypeDescriptor::debug_string() const {
Expand Down
6 changes: 3 additions & 3 deletions be/src/runtime/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct TypeDescriptor {
std::vector<std::string> field_names;

// Used for complex types only.
// Whether subtypes of a complex type is nullable
std::vector<bool> contains_nulls;

TypeDescriptor() : type(INVALID_TYPE), len(-1), precision(-1), scale(-1) {}
Expand Down Expand Up @@ -220,11 +221,10 @@ struct TypeDescriptor {
std::string debug_string() const;

// use to array type and map type add sub type
void add_sub_type(TypeDescriptor&& sub_type, bool&& is_nullable = true);
void add_sub_type(TypeDescriptor sub_type, bool is_nullable = true);

// use to struct type add sub type
void add_sub_type(TypeDescriptor&& sub_type, std::string&& field_name,
bool&& is_nullable = true);
void add_sub_type(TypeDescriptor sub_type, std::string field_name, bool is_nullable = true);

private:
/// Used to create a possibly nested type from the flattened Thrift representation.
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/data_type_nullable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void DataTypeNullable::to_string(const IColumn& column, size_t row_num,
if (col_null.is_null_at(row_num)) {
ostr.write("NULL", 4);
} else {
get_nested_type()->to_string(col_null, row_num, ostr);
get_nested_type()->to_string(col_null.get_nested_column(), row_num, ostr);
}
}

Expand Down
8 changes: 7 additions & 1 deletion be/src/vec/exec/format/parquet/schema_desc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Status FieldDescriptor::parse_node_field(const std::vector<tparquet::SchemaEleme
transform(t_schema.name.begin(), t_schema.name.end(), lower_case_name.begin(), ::tolower);
node_field->name = lower_case_name;
node_field->type.type = TYPE_ARRAY;
node_field->type.add_sub_type(child->type);
node_field->is_nullable = false;
_next_schema_pos = curr_pos + 1;
} else {
Expand Down Expand Up @@ -325,6 +326,7 @@ Status FieldDescriptor::parse_group_field(const std::vector<tparquet::SchemaElem

group_field->name = group_schema.name;
group_field->type.type = TYPE_ARRAY;
group_field->type.add_sub_type(struct_field->type);
group_field->is_nullable = false;
} else {
RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos, group_field));
Expand Down Expand Up @@ -392,7 +394,7 @@ Status FieldDescriptor::parse_list_field(const std::vector<tparquet::SchemaEleme

list_field->name = first_level.name;
list_field->type.type = TYPE_ARRAY;
list_field->type.children.push_back(list_field->children[0].type);
list_field->type.add_sub_type(list_field->children[0].type);
list_field->is_nullable = is_optional;

return Status::OK();
Expand Down Expand Up @@ -454,6 +456,7 @@ Status FieldDescriptor::parse_map_field(const std::vector<tparquet::SchemaElemen

map_field->name = map_schema.name;
map_field->type.type = TYPE_MAP;
map_field->type.add_sub_type(map_kv_field->type);
map_field->is_nullable = is_optional;

return Status::OK();
Expand All @@ -477,6 +480,9 @@ Status FieldDescriptor::parse_struct_field(const std::vector<tparquet::SchemaEle
struct_field->name = struct_schema.name;
struct_field->is_nullable = is_optional;
struct_field->type.type = TYPE_STRUCT;
for (int i = 0; i < num_children; ++i) {
struct_field->type.add_sub_type(struct_field->children[i].type);
}
return Status::OK();
}

Expand Down
Binary file not shown.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -206,5 +206,43 @@ suite("test_parquet_orc_case", "p0") {


sql """ DROP TABLE IF EXISTS ${tableName} """

def arrayParquetTbl = "test_array_parquet_tb"
sql """ DROP TABLE IF EXISTS ${arrayParquetTbl} """

sql """
CREATE TABLE ${arrayParquetTbl} (
k1 int NULL,
a1 array<boolean> NULL,
a2 array<tinyint> NULL,
a3 array<smallint> NULL,
a4 array<int> NULL,
a5 array<bigint> NULL,
a6 array<largeint> NULL,
a7 array<decimal(27, 7)> NULL,
a8 array<float> NULL,
a9 array<double> NULL,
a10 array<date> NULL,
a11 array<datetime> NULL,
a12 array<char(20)> NULL,
a13 array<varchar(50)> NULL,
a14 array<string> NULL
)
DUPLICATE KEY(k1)
DISTRIBUTED BY HASH(k1) BUCKETS 5
PROPERTIES(
"replication_num"="1"
);
"""

streamLoad {
table "${arrayParquetTbl}"
set 'format', 'parquet'
set 'columns', '`k1`, `a1`, `a2`, `a3`, `a4`, `a5`, `a6`, `a7`, `a8`, `a9`, `a10`, `a11`, `a12`, `a13`, `a14`'
file 'array_test.parquet'
time 10000 // limit inflight 10s
}
sql "sync"
qt_sql_array_parquet "select * from ${arrayParquetTbl} order by k1 limit 3"
}