Describe the enhancement requested
We can reserve memory before running loops in reading.
Also we can put check on zero null count not to check validity bit mask when there are no nulls.
|
void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override { |
|
uint8_t* valid_bits = valid_bits_->mutable_data(); |
|
const int64_t valid_bits_offset = values_written_; |
|
auto values = ValuesHead<FLBA>(); |
|
|
|
int64_t num_decoded = this->current_decoder_->DecodeSpaced( |
|
values, static_cast<int>(values_to_read), static_cast<int>(null_count), |
|
valid_bits, valid_bits_offset); |
|
ARROW_DCHECK_EQ(num_decoded, values_to_read); |
|
|
|
for (int64_t i = 0; i < num_decoded; i++) { |
|
if (::arrow::bit_util::GetBit(valid_bits, valid_bits_offset + i)) { |
|
PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr)); |
|
} else { |
|
PARQUET_THROW_NOT_OK(builder_->AppendNull()); |
|
} |
|
} |
We can get this situation when we have optional fields in a batch without having nulls here
|
inline bool HasSpacedValues(const ColumnDescriptor* descr) { |
|
if (descr->max_repetition_level() > 0) { |
|
// repeated+flat case |
|
return !descr->schema_node()->is_required(); |
|
} else { |
|
// non-repeated+nested case |
|
// Find if a node forces nulls in the lowest level along the hierarchy |
|
const schema::Node* node = descr->schema_node().get(); |
|
while (node) { |
|
if (node->is_optional()) { |
|
return true; |
|
} |
|
node = node->parent(); |
|
} |
|
return false; |
|
} |
|
} |
Component(s)
C++, Parquet
Describe the enhancement requested
We can reserve memory before running loops in reading.
Also we can put check on zero null count not to check validity bit mask when there are no nulls.
arrow/cpp/src/parquet/column_reader.cc
Lines 2074 to 2090 in f7286a9
We can get this situation when we have optional fields in a batch without having nulls here
arrow/cpp/src/parquet/column_reader.cc
Lines 77 to 93 in ef3797d
Component(s)
C++, Parquet