diff --git a/be/src/exec/operator/olap_scan_operator.cpp b/be/src/exec/operator/olap_scan_operator.cpp index 4d401a78100b5f..4cb9a7808ae63f 100644 --- a/be/src/exec/operator/olap_scan_operator.cpp +++ b/be/src/exec/operator/olap_scan_operator.cpp @@ -224,6 +224,8 @@ Status OlapScanLocalState::_init_profile() { _lazy_read_seek_timer = ADD_TIMER(_segment_profile, "LazyReadSeekTime"); _lazy_read_seek_counter = ADD_COUNTER(_segment_profile, "LazyReadSeekCount", TUnit::UNIT); + _lazy_read_pruned_timer = ADD_TIMER(_segment_profile, "LazyReadPrunedTime"); + _output_col_timer = ADD_TIMER(_segment_profile, "OutputColumnTime"); _stats_filtered_counter = ADD_COUNTER(_segment_profile, "RowsStatsFiltered", TUnit::UNIT); diff --git a/be/src/exec/operator/olap_scan_operator.h b/be/src/exec/operator/olap_scan_operator.h index 344dd604db1965..c90aefc298e80d 100644 --- a/be/src/exec/operator/olap_scan_operator.h +++ b/be/src/exec/operator/olap_scan_operator.h @@ -207,6 +207,7 @@ class OlapScanLocalState final : public ScanLocalState { RuntimeProfile::Counter* _lazy_read_timer = nullptr; RuntimeProfile::Counter* _lazy_read_seek_timer = nullptr; RuntimeProfile::Counter* _lazy_read_seek_counter = nullptr; + RuntimeProfile::Counter* _lazy_read_pruned_timer = nullptr; // total pages read // used by segment v2 diff --git a/be/src/exec/scan/olap_scanner.cpp b/be/src/exec/scan/olap_scanner.cpp index 320976814679b9..f60b60ac31f7a4 100644 --- a/be/src/exec/scan/olap_scanner.cpp +++ b/be/src/exec/scan/olap_scanner.cpp @@ -922,6 +922,7 @@ void OlapScanner::_collect_profile_before_close() { COUNTER_UPDATE(local_state->_predicate_column_read_seek_counter, stats.predicate_column_read_seek_num); COUNTER_UPDATE(local_state->_lazy_read_timer, stats.lazy_read_ns); + COUNTER_UPDATE(local_state->_lazy_read_pruned_timer, stats.lazy_read_pruned_ns); COUNTER_UPDATE(local_state->_lazy_read_seek_timer, stats.block_lazy_read_seek_ns); COUNTER_UPDATE(local_state->_lazy_read_seek_counter, stats.block_lazy_read_seek_num); COUNTER_UPDATE(local_state->_output_col_timer, stats.output_col_ns); diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index a6c2839c0d3e0f..2a58dc31fc9247 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -617,6 +617,11 @@ class RuntimeState { _query_options.enable_aggregate_function_null_v2; } + bool enable_prune_nested_column() const { + return _query_options.__isset.enable_prune_nested_column && + _query_options.enable_prune_nested_column; + } + bool is_read_csv_empty_line_as_null() const { return _query_options.__isset.read_csv_empty_line_as_null && _query_options.read_csv_empty_line_as_null; diff --git a/be/src/storage/olap_common.h b/be/src/storage/olap_common.h index 2a8893a1b0e939..807e4c2d57b7f8 100644 --- a/be/src/storage/olap_common.h +++ b/be/src/storage/olap_common.h @@ -332,6 +332,7 @@ struct OlapReaderStatistics { int64_t lazy_read_ns = 0; int64_t block_lazy_read_seek_num = 0; int64_t block_lazy_read_seek_ns = 0; + int64_t lazy_read_pruned_ns = 0; int64_t raw_rows_read = 0; diff --git a/be/src/storage/segment/column_reader.cpp b/be/src/storage/segment/column_reader.cpp index ebb1887c8ee920..371adf802e8f88 100644 --- a/be/src/storage/segment/column_reader.cpp +++ b/be/src/storage/segment/column_reader.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "common/compiler_util.h" // IWYU pragma: keep @@ -89,6 +90,26 @@ inline bool read_as_string(PrimitiveType type) { type == PrimitiveType::TYPE_BITMAP || type == PrimitiveType::TYPE_FIXED_LENGTH_OBJECT; } +bool is_current_level_meta_access_path(const TColumnAccessPath& path) { + if (path.data_access_path.path.size() != 1) { + return false; + } + const auto& component = path.data_access_path.path[0]; + return StringCaseEqual()(component, ColumnIterator::ACCESS_OFFSET) || + StringCaseEqual()(component, ColumnIterator::ACCESS_NULL); +} + +bool is_current_level_data_access_path(const TColumnAccessPath& path, + const std::string& column_name) { + return path.data_access_path.path.size() == 1 && + StringCaseEqual()(path.data_access_path.path[0], column_name); +} + +void remove_current_level_meta_access_paths(TColumnAccessPaths& paths) { + auto removed = std::ranges::remove_if(paths, is_current_level_meta_access_path); + paths.erase(removed.begin(), removed.end()); +} + Status ColumnReader::create_array(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, const io::FileReaderSPtr& file_reader, std::shared_ptr* reader) { @@ -853,14 +874,45 @@ Status ColumnReader::new_struct_iterator(ColumnIteratorUPtr* iterator, return Status::OK(); } +void ColumnIterator::_convert_to_place_holder_column(MutableColumnPtr& dst, size_t count) { + if (_read_phase == ReadPhase::LAZY) { + return; + } else if (_read_requirement == ReadRequirement::LAZY_OUTPUT && + _read_phase == ReadPhase::PREDICATE) { + // This branch is for non-predicate columns that still have to appear in the + // predicate-phase block so row filtering can keep all block columns aligned. + // Columns already marked PREDICATE are read normally, and SKIP/NORMAL + // columns do not participate in lazy materialization. + _has_place_holder_column = true; + } + + dst->insert_many_defaults(count); +} + +void ColumnIterator::_recovery_from_place_holder_column(MutableColumnPtr& dst) { + if (_read_phase == ReadPhase::LAZY && _has_place_holder_column) { + dst->clear(); + _has_place_holder_column = false; + } +} + Result ColumnIterator::_get_sub_access_paths( - const TColumnAccessPaths& access_paths) { - TColumnAccessPaths sub_access_paths = access_paths; + TColumnAccessPaths sub_access_paths, bool is_predicate) { + // Access paths passed to a complex iterator always start with the current + // column name. Strip that component and return the remaining child-relative + // paths to the caller. For example, when this iterator is for column `s`, + // path `s.a.b` is converted to `a.b` and then dispatched to child `a`. + // + // If stripping the current column consumes the whole path, the current + // iterator itself is requested rather than one of its children. Mark the + // current iterator according to the path source: predicate paths must be read + // in the predicate phase, while all/output paths become lazy output targets. + // Empty or mismatched paths indicate an FE/BE access-path contract violation. for (auto it = sub_access_paths.begin(); it != sub_access_paths.end();) { TColumnAccessPath& name_path = *it; if (name_path.data_access_path.path.empty()) { - return ResultError( - Status::InternalError("Invalid access path for struct column: path is empty")); + return ResultError(Status::InternalError( + "Invalid access path for column '{}': path is empty", _column_name)); } if (!StringCaseEqual()(name_path.data_access_path.path[0], _column_name)) { @@ -873,7 +925,11 @@ Result ColumnIterator::_get_sub_access_paths( if (!name_path.data_access_path.path.empty()) { ++it; } else { - set_need_to_read(); + if (is_predicate) { + set_read_requirement(ReadRequirement::PREDICATE); + } else { + set_lazy_output_requirement(); + } it = sub_access_paths.erase(it); } } @@ -896,7 +952,7 @@ MapFileColumnIterator::MapFileColumnIterator(std::shared_ptr reade } Status MapFileColumnIterator::init(const ColumnIteratorOptions& opts) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (_read_requirement == ReadRequirement::SKIP) { DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -910,7 +966,7 @@ Status MapFileColumnIterator::init(const ColumnIteratorOptions& opts) { } Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -928,7 +984,7 @@ Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { } RETURN_IF_ERROR(_offsets_iterator->seek_to_ordinal(ord)); if (read_offset_only()) { - // In OFFSET_ONLY mode, key/value iterators are SKIP_READING, no need to seek them + // In OFFSET_ONLY mode, key/value iterators are SKIP, no need to seek them return Status::OK(); } // here to use offset info @@ -952,23 +1008,37 @@ Status MapFileColumnIterator::init_prefetcher(const SegmentPrefetchParams& param void MapFileColumnIterator::collect_prefetchers( std::map>& prefetchers, PrefetcherInitMethod init_method) { - _offsets_iterator->collect_prefetchers(prefetchers, init_method); + if (!need_to_read()) { + return; + } + if (!read_null_map_only()) { + _offsets_iterator->collect_prefetchers(prefetchers, init_method); + } if (_map_reader->is_nullable()) { _null_iterator->collect_prefetchers(prefetchers, init_method); } + if (read_offset_only() || read_null_map_only()) { + return; + } // the actual data pages to read of key/value column depends on the read result of offset column, // so we can't init prefetch blocks according to rowids, just prefetch all data blocks here. - _key_iterator->collect_prefetchers(prefetchers, PrefetcherInitMethod::ALL_DATA_BLOCKS); - _val_iterator->collect_prefetchers(prefetchers, PrefetcherInitMethod::ALL_DATA_BLOCKS); + if (_key_iterator->need_to_read()) { + _key_iterator->collect_prefetchers(prefetchers, PrefetcherInitMethod::ALL_DATA_BLOCKS); + } + if (_val_iterator->need_to_read()) { + _val_iterator->collect_prefetchers(prefetchers, PrefetcherInitMethod::ALL_DATA_BLOCKS); + } } Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); + _convert_to_place_holder_column(dst, *n); return Status::OK(); } + _recovery_from_place_holder_column(dst); + if (read_null_map_only()) { // NULL_MAP_ONLY mode: read null map, fill nested ColumnMap with empty defaults DORIS_CHECK(is_column_nullable(*dst)); @@ -996,13 +1066,33 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* auto& column_map = assert_cast( is_column_nullable(*dst) ? static_cast(*dst).get_nested_column() : *dst); - auto column_offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + const bool read_meta_columns = need_to_read_meta_columns(); + MutableColumnPtr column_offsets_ptr; + if (read_meta_columns) { + column_offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + } else { + // The parent offsets were already materialized in the predicate phase, so + // they must not be appended to dst again. We still read offsets into a + // temporary column here: this sequential path may be serving a nested + // lazy read after seek_to_ordinal(), and the storage offsets are needed to + // compute how many key/value elements to read from the current source + // ordinal. The existing dst offsets only describe the filtered output + // shape and do not track the current source ordinal consumed by this + // iterator call. + const auto base_offset = + column_map.get_offsets().empty() ? 0 : column_map.get_offsets().back(); + column_offsets_ptr = ColumnMap::COffsets::create(); + assert_cast(*column_offsets_ptr) + .insert_value(base_offset); + } Defer defer_offsets {[&] { - auto typed_column_offsets_ptr = ColumnMap::COffsets::cast_to_column_mutptr( - assert_cast( - column_offsets_ptr.get())); - column_offsets_ptr = nullptr; - column_map.get_offsets_ptr() = std::move(typed_column_offsets_ptr); + if (read_meta_columns) { + auto typed_column_offsets_ptr = ColumnMap::COffsets::cast_to_column_mutptr( + assert_cast( + column_offsets_ptr.get())); + column_offsets_ptr = nullptr; + column_map.get_offsets_ptr() = std::move(typed_column_offsets_ptr); + } }}; bool offsets_has_null = false; ssize_t start = column_offsets_ptr->size(); @@ -1035,7 +1125,7 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* } } - if (is_column_nullable(*dst)) { + if (is_column_nullable(*dst) && read_meta_columns) { size_t num_read = *n; auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); // in not-null to null linked-schemachange mode, @@ -1057,12 +1147,14 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, MutableColumnPtr& dst) { - if (_reading_flag == ReadingFlag::SKIP_READING) { - DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); + if (!need_to_read()) { + DLOG(INFO) << "Map column iterator column " << _column_name << " skip reading."; + _convert_to_place_holder_column(dst, count); return Status::OK(); } + _recovery_from_place_holder_column(dst); + if (read_null_map_only()) { // NULL_MAP_ONLY mode: read null map by rowids, fill nested ColumnMap with empty defaults DORIS_CHECK(is_column_nullable(*dst)); @@ -1086,48 +1178,80 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t if (count == 0) { return Status::OK(); } + // resolve ColumnMap and nullable wrapper auto& column_map = assert_cast( is_column_nullable(*dst) ? static_cast(*dst).get_nested_column() : *dst); - auto offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + const bool read_meta_columns = need_to_read_meta_columns(); + MutableColumnPtr offsets_ptr; + if (read_meta_columns) { + offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + } else { + const auto base_offset = + column_map.get_offsets().empty() ? 0 : column_map.get_offsets().back(); + offsets_ptr = ColumnMap::COffsets::create(); + assert_cast(*offsets_ptr) + .insert_value(base_offset); + } Defer defer_offsets {[&] { - auto typed_offsets_ptr = ColumnMap::COffsets::cast_to_column_mutptr( - assert_cast(offsets_ptr.get())); - offsets_ptr = nullptr; - column_map.get_offsets_ptr() = std::move(typed_offsets_ptr); + if (read_meta_columns) { + auto typed_offsets_ptr = ColumnMap::COffsets::cast_to_column_mutptr( + assert_cast( + offsets_ptr.get())); + offsets_ptr = nullptr; + column_map.get_offsets_ptr() = std::move(typed_offsets_ptr); + } }}; auto& offsets = static_cast(*offsets_ptr); size_t base = offsets.get_data().empty() ? 0 : offsets.get_data().back(); // 1. bulk read null-map if nullable std::vector null_mask; // 0: not null, 1: null - if (_map_reader->is_nullable()) { - // For nullable map columns, the destination column must also be nullable. + if (read_meta_columns) { + if (_map_reader->is_nullable()) { + // For nullable map columns, the destination column must also be nullable. + if (UNLIKELY(!is_column_nullable(*dst))) { + return Status::InternalError( + "unexpected non-nullable destination column for nullable map reader"); + } + MutableColumnPtr null_map_ptr = + static_cast(*dst).get_null_map_column_ptr(); + size_t null_before = null_map_ptr->size(); + RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count, null_map_ptr)); + // extract a light-weight view to decide element reads + auto& null_map_col = assert_cast(*null_map_ptr); + const auto* src = null_map_col.get_data().data() + null_before; + null_mask.assign(src, src + count); + } else if (is_column_nullable(*dst)) { + // in not-null to null linked-schemachange mode, + // actually we do not change dat data include meta in footer, + // so may dst from changed meta which is nullable but old data is not nullable, + // if so, we should set null_map to all null by default + MutableColumnPtr null_map_ptr = + static_cast(*dst).get_null_map_column_ptr(); + auto& null_map = assert_cast(*null_map_ptr); + null_map.insert_many_vals(0, count); + } + } else if (_map_reader->is_nullable()) { + // In lazy mode the parent null map has already been materialized during + // predicate read and filtered together with the block. Reuse that dst + // null map to avoid re-reading the same meta column from storage. if (UNLIKELY(!is_column_nullable(*dst))) { return Status::InternalError( "unexpected non-nullable destination column for nullable map reader"); } - auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); - size_t null_before = null_map_ptr->size(); - auto* null_map_col = null_map_ptr.get(); - MutableColumnPtr null_map_column = std::move(null_map_ptr); - RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count, null_map_column)); - // extract a light-weight view to decide element reads - null_mask.reserve(count); - for (size_t i = 0; i < count; ++i) { - null_mask.push_back(null_map_col->get_element(null_before + i)); - } - } else if (is_column_nullable(*dst)) { - // in not-null to null linked-schemachange mode, - // actually we do not change dat data include meta in footer, - // so may dst from changed meta which is nullable but old data is not nullable, - // if so, we should set null_map to all null by default - auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); - null_map_ptr->insert_many_vals(0, count); + const auto& null_map_col = static_cast(*dst).get_null_map_column(); + DORIS_CHECK(null_map_col.size() == count); + const auto* src = null_map_col.get_data().data(); + null_mask.assign(src, src + count); } - // 2. bulk read start ordinals for requested rows + // 2. Bulk read source start ordinals for requested rows. The offsets stored + // in dst already describe the filtered output shape when read_meta_columns is + // false, but they do not contain the source key/value ordinal for each + // selected rowid. We still need the storage offsets here to seek child + // iterators to the correct source element ranges. MutableColumnPtr starts_col = ColumnOffset64::create(); starts_col->reserve(count); RETURN_IF_ERROR(_offsets_iterator->read_by_rowids(rowids, count, starts_col)); @@ -1167,16 +1291,19 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t auto& next_starts_data = assert_cast(*next_starts_col).get_data(); std::vector sizes(count, 0); size_t acc = base; - const auto original_size = offsets.get_data().back(); - offsets.get_data().reserve(offsets.get_data().size() + count); + if (read_meta_columns) { + offsets.get_data().reserve(offsets.get_data().size() + count); + } for (size_t i = 0; i < count; ++i) { - size_t sz = static_cast(next_starts_data[i] - starts_data[i]); + auto sz = static_cast(next_starts_data[i] - starts_data[i]); if (_map_reader->is_nullable() && !null_mask.empty() && null_mask[i]) { sz = 0; // null rows do not consume elements } sizes[i] = sz; acc += sz; - offsets.get_data().push_back(acc); + if (read_meta_columns) { + offsets.get_data().push_back(acc); + } } // 6. read key/value elements for non-empty sizes @@ -1199,18 +1326,14 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t bool dummy_has_null = false; if (this_run != 0) { - if (_key_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } - - if (_val_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - n = this_run; - RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } + RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); + DCHECK(n == this_run); + + n = this_run; + RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); + DCHECK(n == this_run); } start_idx = start; this_run = sz; @@ -1223,36 +1346,24 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t } size_t n = this_run; - const size_t total_count = offsets.get_data().back() - original_size; bool dummy_has_null = false; - if (_key_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - if (this_run != 0) { - RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } - } else { - keys_ptr->insert_many_defaults(total_count); - } + if (this_run != 0) { + RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_key_iterator->next_batch(&n, keys_ptr, &dummy_has_null)); + DCHECK(n == this_run); - if (_val_iterator->reading_flag() != ReadingFlag::SKIP_READING) { - if (this_run != 0) { - n = this_run; - RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); - RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); - DCHECK(n == this_run); - } - } else { - vals_ptr->insert_many_defaults(total_count); + n = this_run; + RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(start_idx)); + RETURN_IF_ERROR(_val_iterator->next_batch(&n, vals_ptr, &dummy_has_null)); + DCHECK(n == this_run); } - return Status::OK(); } -void MapFileColumnIterator::set_need_to_read() { - set_reading_flag(ReadingFlag::NEED_TO_READ); - _key_iterator->set_need_to_read(); - _val_iterator->set_need_to_read(); +void MapFileColumnIterator::set_lazy_output_requirement() { + set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); + _key_iterator->set_lazy_output_requirement(); + _val_iterator->set_lazy_output_requirement(); } void MapFileColumnIterator::remove_pruned_sub_iterators() { @@ -1262,37 +1373,65 @@ void MapFileColumnIterator::remove_pruned_sub_iterators() { Status MapFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) { - if (all_access_paths.empty()) { + if (all_access_paths.empty() && predicate_access_paths.empty()) { return Status::OK(); } + const auto requirement_before_access_path = _read_requirement; if (!predicate_access_paths.empty()) { - set_reading_flag(ReadingFlag::READING_FOR_PREDICATE); - DLOG(INFO) << "Map column iterator set sub-column " << _column_name - << " to READING_FOR_PREDICATE"; + set_read_requirement_self(ReadRequirement::PREDICATE); + DLOG(INFO) << "Map column iterator set sub-column " << _column_name << " to PREDICATE"; } + const bool has_current_level_data_path = + std::ranges::any_of(all_access_paths, [this](const TColumnAccessPath& path) { + return is_current_level_data_access_path(path, _column_name); + }); auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); - auto sub_predicate_access_paths = DORIS_TRY(_get_sub_access_paths(predicate_access_paths)); - - if (sub_all_access_paths.empty()) { - return Status::OK(); + auto sub_predicate_access_paths = + DORIS_TRY(_get_sub_access_paths(predicate_access_paths, true)); + if (has_current_level_data_path) { + remove_current_level_meta_access_paths(sub_all_access_paths); + } + const bool has_current_level_predicate_meta_path = + std::ranges::any_of(sub_predicate_access_paths, is_current_level_meta_access_path); + // Current-level predicate metadata paths are consumed by this map iterator and must not be + // forwarded to key/value children. The FE keeps all_access_paths as a superset of predicate + // paths, so meta-only mode is still decided from sub_all_access_paths below. + remove_current_level_meta_access_paths(sub_predicate_access_paths); + + if (sub_predicate_access_paths.empty() && _read_requirement == ReadRequirement::PREDICATE && + !has_current_level_predicate_meta_path) { + // if no sub-column in predicate_access_paths, but current column is PREDICATE, + // then we should set key/value iterator to PREDICATE too. + _key_iterator->set_read_requirement(ReadRequirement::PREDICATE); + _val_iterator->set_read_requirement(ReadRequirement::PREDICATE); + } + + if (sub_predicate_access_paths.empty()) { + // Check for meta-only modes (OFFSET_ONLY or NULL_MAP_ONLY). Only skip key/value + // iterators when no predicate sub-path needs them in the predicate phase. + _check_and_set_meta_read_mode(requirement_before_access_path, sub_all_access_paths); + if (read_offset_only()) { + _key_iterator->set_read_requirement(ReadRequirement::SKIP); + _val_iterator->set_read_requirement(ReadRequirement::SKIP); + DLOG(INFO) << "Map column iterator set column " << _column_name + << " to OFFSET_ONLY meta read mode, key/value columns set to SKIP"; + return Status::OK(); + } + if (read_null_map_only()) { + _key_iterator->set_read_requirement(ReadRequirement::SKIP); + _val_iterator->set_read_requirement(ReadRequirement::SKIP); + DLOG(INFO) << "Map column iterator set column " << _column_name + << " to NULL_MAP_ONLY meta read mode, key/value columns set to SKIP"; + return Status::OK(); + } } - // Check for meta-only modes (OFFSET_ONLY or NULL_MAP_ONLY) - _check_and_set_meta_read_mode(sub_all_access_paths); - if (read_offset_only()) { - _key_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - _val_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - DLOG(INFO) << "Map column iterator set column " << _column_name - << " to OFFSET_ONLY reading mode, key/value columns set to SKIP_READING"; - return Status::OK(); - } - if (read_null_map_only()) { - _key_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - _val_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - DLOG(INFO) << "Map column iterator set column " << _column_name - << " to NULL_MAP_ONLY reading mode, key/value columns set to SKIP_READING"; + // A current-level data path is consumed by _get_sub_access_paths() and leaves + // sub_all_access_paths empty after marking key/value as lazy-read targets. Predicate + // sub-paths still have to be routed to child iterators for the predicate phase. + if (sub_all_access_paths.empty() && sub_predicate_access_paths.empty()) { return Status::OK(); } @@ -1324,9 +1463,6 @@ Status MapFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_acc val_all_access_paths.emplace_back(paths); } } - const auto need_read_keys = !key_all_access_paths.empty(); - const auto need_read_values = !val_all_access_paths.empty(); - for (auto paths : sub_predicate_access_paths) { if (paths.data_access_path.path[0] == ACCESS_ALL) { // Same logic as above: key needs full data, value gets the sub-path. @@ -1347,26 +1483,65 @@ Status MapFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_acc } } + const auto need_read_keys = + !key_all_access_paths.empty() || !key_predicate_access_paths.empty(); + const auto need_read_values = + !val_all_access_paths.empty() || !val_predicate_access_paths.empty(); + if (need_read_keys) { - _key_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ); RETURN_IF_ERROR( _key_iterator->set_access_paths(key_all_access_paths, key_predicate_access_paths)); + // Apply LAZY_OUTPUT after child predicate paths have been handled. Read requirements are + // monotonic, so a predicate-only child already promoted to PREDICATE will not + // be downgraded, while a non-predicate child becomes a lazy materialization target. + _key_iterator->set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); } else { - _key_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - DLOG(INFO) << "Map column iterator set key column to SKIP_READING"; + _key_iterator->set_read_requirement(ReadRequirement::SKIP); + DLOG(INFO) << "Map column iterator set key column to SKIP"; } if (need_read_values) { - _val_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ); RETURN_IF_ERROR( _val_iterator->set_access_paths(val_all_access_paths, val_predicate_access_paths)); + // Same as keys: predicate-only value paths stay PREDICATE because this + // post-processing update cannot lower a stronger child requirement. + _val_iterator->set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); } else { - _val_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - DLOG(INFO) << "Map column iterator set value column to SKIP_READING"; + _val_iterator->set_read_requirement(ReadRequirement::SKIP); + DLOG(INFO) << "Map column iterator set value column to SKIP"; } return Status::OK(); } +void MapFileColumnIterator::set_read_phase(ReadPhase mode) { + ColumnIterator::set_read_phase(mode); + _key_iterator->set_read_phase(mode); + _val_iterator->set_read_phase(mode); +} + +void MapFileColumnIterator::finalize_lazy_phase(MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + auto& map_column = assert_cast( + dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); + auto keys_ptr = IColumn::mutate(std::move(map_column.get_keys_ptr())); + auto vals_ptr = IColumn::mutate(std::move(map_column.get_values_ptr())); + _key_iterator->finalize_lazy_phase(keys_ptr); + _val_iterator->finalize_lazy_phase(vals_ptr); + map_column.get_keys_ptr() = std::move(keys_ptr); + map_column.get_values_ptr() = std::move(vals_ptr); +} + +void MapFileColumnIterator::set_read_requirement(ReadRequirement requirement) { + set_read_requirement_self(requirement); + _key_iterator->set_read_requirement(requirement); + _val_iterator->set_read_requirement(requirement); +} + +bool MapFileColumnIterator::has_lazy_read_target() const { + return _read_requirement == ReadRequirement::LAZY_OUTPUT || + _key_iterator->has_lazy_read_target() || _val_iterator->has_lazy_read_target(); +} + //////////////////////////////////////////////////////////////////////////////// StructFileColumnIterator::StructFileColumnIterator( @@ -1379,7 +1554,7 @@ StructFileColumnIterator::StructFileColumnIterator( } Status StructFileColumnIterator::init(const ColumnIteratorOptions& opts) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (_read_requirement == ReadRequirement::SKIP) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1394,12 +1569,14 @@ Status StructFileColumnIterator::init(const ColumnIteratorOptions& opts) { } Status StructFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); + _convert_to_place_holder_column(dst, *n); return Status::OK(); } + _recovery_from_place_holder_column(dst); + if (read_null_map_only()) { // NULL_MAP_ONLY mode: read null map, fill nested ColumnStruct with empty defaults DORIS_CHECK(is_column_nullable(*dst)); @@ -1438,7 +1615,7 @@ Status StructFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bo DCHECK(num_read == *n); } - if (is_column_nullable(*dst)) { + if (is_column_nullable(*dst) && need_to_read_meta_columns()) { size_t num_read = *n; auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); // in not-null to null linked-schemachange mode, @@ -1460,7 +1637,7 @@ Status StructFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bo } Status StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1476,7 +1653,8 @@ Status StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) { for (auto& column_iterator : _sub_column_iterators) { RETURN_IF_ERROR(column_iterator->seek_to_ordinal(ord)); } - if (_struct_reader->is_nullable()) { + + if (_struct_reader->is_nullable() && need_to_read_meta_columns()) { RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord)); } return Status::OK(); @@ -1495,22 +1673,32 @@ Status StructFileColumnIterator::init_prefetcher(const SegmentPrefetchParams& pa void StructFileColumnIterator::collect_prefetchers( std::map>& prefetchers, PrefetcherInitMethod init_method) { - for (auto& column_iterator : _sub_column_iterators) { - column_iterator->collect_prefetchers(prefetchers, init_method); + if (!need_to_read()) { + return; } if (_struct_reader->is_nullable()) { _null_iterator->collect_prefetchers(prefetchers, init_method); } + if (read_null_map_only()) { + return; + } + for (auto& column_iterator : _sub_column_iterators) { + if (column_iterator->need_to_read()) { + column_iterator->collect_prefetchers(prefetchers, init_method); + } + } } Status StructFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, MutableColumnPtr& dst) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Struct column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); + _convert_to_place_holder_column(dst, count); return Status::OK(); } + _recovery_from_place_holder_column(dst); + if (count == 0) { return Status::OK(); } @@ -1541,17 +1729,17 @@ Status StructFileColumnIterator::read_by_rowids(const rowid_t* rowids, const siz return Status::OK(); } -void StructFileColumnIterator::set_need_to_read() { - set_reading_flag(ReadingFlag::NEED_TO_READ); +void StructFileColumnIterator::set_lazy_output_requirement() { + set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); for (auto& sub_iterator : _sub_column_iterators) { - sub_iterator->set_need_to_read(); + sub_iterator->set_lazy_output_requirement(); } } void StructFileColumnIterator::remove_pruned_sub_iterators() { for (auto it = _sub_column_iterators.begin(); it != _sub_column_iterators.end();) { auto& sub_iterator = *it; - if (sub_iterator->reading_flag() == ReadingFlag::SKIP_READING) { + if (sub_iterator->read_requirement() == ReadRequirement::SKIP) { DLOG(INFO) << "Struct column iterator remove pruned sub-column " << sub_iterator->column_name(); it = _sub_column_iterators.erase(it); @@ -1565,27 +1753,45 @@ void StructFileColumnIterator::remove_pruned_sub_iterators() { Status StructFileColumnIterator::set_access_paths( const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) { - if (all_access_paths.empty()) { + if (all_access_paths.empty() && predicate_access_paths.empty()) { return Status::OK(); } + const auto requirement_before_access_path = _read_requirement; if (!predicate_access_paths.empty()) { - set_reading_flag(ReadingFlag::READING_FOR_PREDICATE); - DLOG(INFO) << "Struct column iterator set sub-column " << _column_name - << " to READING_FOR_PREDICATE"; + set_read_requirement_self(ReadRequirement::PREDICATE); + DLOG(INFO) << "Struct column iterator set sub-column " << _column_name << " to PREDICATE"; } - auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); - auto sub_predicate_access_paths = DORIS_TRY(_get_sub_access_paths(predicate_access_paths)); - // Check for NULL_MAP_ONLY mode: only read null map, skip all sub-columns - _check_and_set_meta_read_mode(sub_all_access_paths); - if (read_null_map_only()) { - for (auto& sub_iterator : _sub_column_iterators) { - sub_iterator->set_reading_flag(ReadingFlag::SKIP_READING); + const bool has_current_level_data_path = + std::ranges::any_of(all_access_paths, [this](const TColumnAccessPath& path) { + return is_current_level_data_access_path(path, _column_name); + }); + auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); + auto sub_predicate_access_paths = + DORIS_TRY(_get_sub_access_paths(predicate_access_paths, true)); + if (has_current_level_data_path) { + remove_current_level_meta_access_paths(sub_all_access_paths); + } + const bool has_current_level_predicate_meta_path = + std::ranges::any_of(sub_predicate_access_paths, is_current_level_meta_access_path); + // Current-level predicate metadata paths are consumed by this struct iterator and must not be + // forwarded to child fields. The FE keeps all_access_paths as a superset of predicate paths, so + // NULL_MAP_ONLY is still decided from sub_all_access_paths below. + remove_current_level_meta_access_paths(sub_predicate_access_paths); + + if (sub_predicate_access_paths.empty()) { + // Check for NULL_MAP_ONLY mode: only read null map, skip all sub-columns. + // Do not take this early return when predicate child paths must still be read. + _check_and_set_meta_read_mode(requirement_before_access_path, sub_all_access_paths); + if (read_null_map_only()) { + for (auto& sub_iterator : _sub_column_iterators) { + sub_iterator->set_read_requirement(ReadRequirement::SKIP); + } + DLOG(INFO) << "Struct column iterator set column " << _column_name + << " to NULL_MAP_ONLY meta read mode, all sub-columns set to SKIP"; + return Status::OK(); } - DLOG(INFO) << "Struct column iterator set column " << _column_name - << " to NULL_MAP_ONLY reading mode, all sub-columns set to SKIP_READING"; - return Status::OK(); } const auto no_sub_column_to_skip = sub_all_access_paths.empty(); @@ -1593,27 +1799,16 @@ Status StructFileColumnIterator::set_access_paths( for (auto& sub_iterator : _sub_column_iterators) { const auto name = sub_iterator->column_name(); - bool need_to_read = no_sub_column_to_skip; TColumnAccessPaths sub_all_access_paths_of_this; - if (!need_to_read) { + if (!no_sub_column_to_skip) { for (const auto& paths : sub_all_access_paths) { if (paths.data_access_path.path[0] == name) { sub_all_access_paths_of_this.emplace_back(paths); } } - need_to_read = !sub_all_access_paths_of_this.empty(); } - if (!need_to_read) { - sub_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - DLOG(INFO) << "Struct column iterator set sub-column " << name << " to SKIP_READING"; - continue; - } - set_reading_flag(ReadingFlag::NEED_TO_READ); - sub_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ); - TColumnAccessPaths sub_predicate_access_paths_of_this; - if (!no_predicate_sub_column) { for (const auto& paths : sub_predicate_access_paths) { if (StringCaseEqual()(paths.data_access_path.path[0], name)) { @@ -1622,12 +1817,73 @@ Status StructFileColumnIterator::set_access_paths( } } + // Predicate-only child paths still need to be routed to the child iterator + // even when the child is not requested by ordinary projection access paths. + const bool need_to_read = no_sub_column_to_skip || !sub_all_access_paths_of_this.empty() || + !sub_predicate_access_paths_of_this.empty(); + if (!need_to_read) { + set_read_requirement_self(ReadRequirement::SKIP); + sub_iterator->set_read_requirement(ReadRequirement::SKIP); + DLOG(INFO) << "Struct column iterator set sub-column " << name << " to SKIP"; + continue; + } + + if (no_predicate_sub_column && _read_requirement == ReadRequirement::PREDICATE && + !has_current_level_predicate_meta_path) { + // if no sub-column in predicate_access_paths, but current column is PREDICATE, + // then we should set sub iterator to PREDICATE too. + sub_iterator->set_read_requirement(ReadRequirement::PREDICATE); + } + RETURN_IF_ERROR(sub_iterator->set_access_paths(sub_all_access_paths_of_this, sub_predicate_access_paths_of_this)); + // Set LAZY_OUTPUT after routing child predicate paths. If the child was needed only for + // predicate evaluation, set_access_paths() has already promoted it to + // PREDICATE and this monotonic update will not downgrade it. Otherwise, this + // marks the child as a lazy materialization target. + set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); + sub_iterator->set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); } return Status::OK(); } +void StructFileColumnIterator::set_read_phase(ReadPhase mode) { + ColumnIterator::set_read_phase(mode); + for (auto& sub_iterator : _sub_column_iterators) { + sub_iterator->set_read_phase(mode); + } +} + +void StructFileColumnIterator::finalize_lazy_phase(MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + auto& column_struct = assert_cast( + dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); + + for (size_t i = 0; i < _sub_column_iterators.size(); ++i) { + auto& sub_column = column_struct.get_column_ptr(i); + MutableColumnPtr mutable_sub_column = IColumn::mutate(std::move(sub_column)); + _sub_column_iterators[i]->finalize_lazy_phase(mutable_sub_column); + sub_column = std::move(mutable_sub_column); + } +} + +void StructFileColumnIterator::set_read_requirement(ReadRequirement requirement) { + set_read_requirement_self(requirement); + for (const auto& sub_column_iterator : _sub_column_iterators) { + sub_column_iterator->set_read_requirement(requirement); + } +} + +bool StructFileColumnIterator::has_lazy_read_target() const { + if (_read_requirement == ReadRequirement::LAZY_OUTPUT) { + return true; + } + return std::any_of(_sub_column_iterators.begin(), _sub_column_iterators.end(), + [](const auto& sub_column_iterator) { + return sub_column_iterator->has_lazy_read_target(); + }); +} + //////////////////////////////////////////////////////////////////////////////// Status OffsetFileColumnIterator::init(const ColumnIteratorOptions& opts) { RETURN_IF_ERROR(_offset_iterator->init(opts)); @@ -1712,8 +1968,8 @@ ArrayFileColumnIterator::ArrayFileColumnIterator(std::shared_ptr r } Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { - if (_reading_flag == ReadingFlag::SKIP_READING) { - DLOG(INFO) << "Array column iterator column " << _column_name << " skip readking."; + if (_read_requirement == ReadRequirement::SKIP) { + DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1727,7 +1983,7 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { Status ArrayFileColumnIterator::_seek_by_offsets(ordinal_t ord) { if (read_offset_only()) { - // In OFFSET_ONLY mode, item iterator is SKIP_READING, no need to seek it + // In OFFSET_ONLY mode, item iterator is SKIP, no need to seek it return Status::OK(); } // using offsets info @@ -1738,7 +1994,7 @@ Status ArrayFileColumnIterator::_seek_by_offsets(ordinal_t ord) { } Status ArrayFileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -1759,12 +2015,16 @@ Status ArrayFileColumnIterator::seek_to_ordinal(ordinal_t ord) { } Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) { - if (_reading_flag == ReadingFlag::SKIP_READING) { - DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); + if (!need_to_read()) { + DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading, read phase" + << static_cast(_read_phase) + << ", read requirement: " << static_cast(_read_requirement); + _convert_to_place_holder_column(dst, *n); return Status::OK(); } + _recovery_from_place_holder_column(dst); + if (read_null_map_only()) { // NULL_MAP_ONLY mode: read null map, fill nested ColumnArray with empty defaults DORIS_CHECK(is_column_nullable(*dst)); @@ -1794,13 +2054,25 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo : *dst); bool offsets_has_null = false; - auto column_offsets_ptr = std::move(*column_array.get_offsets_ptr()).mutate(); + const bool read_meta_columns = need_to_read_meta_columns(); + MutableColumnPtr column_offsets_ptr; + if (read_meta_columns) { + column_offsets_ptr = IColumn::mutate(std::move(column_array.get_offsets_ptr())); + } else { + const auto base_offset = + column_array.get_offsets().empty() ? 0 : column_array.get_offsets().back(); + column_offsets_ptr = ColumnArray::ColumnOffsets::create(); + assert_cast(*column_offsets_ptr) + .insert_value(base_offset); + } Defer defer_offsets {[&] { - auto typed_column_offsets_ptr = ColumnArray::ColumnOffsets::cast_to_column_mutptr( - assert_cast( - column_offsets_ptr.get())); - column_offsets_ptr = nullptr; - column_array.get_offsets_ptr() = std::move(typed_column_offsets_ptr); + if (read_meta_columns) { + auto typed_column_offsets_ptr = ColumnArray::ColumnOffsets::cast_to_column_mutptr( + assert_cast( + column_offsets_ptr.get())); + column_offsets_ptr = nullptr; + column_array.get_offsets_ptr() = std::move(typed_column_offsets_ptr); + } }}; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offset_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); @@ -1826,7 +2098,7 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo } } - if (is_column_nullable(*dst)) { + if (is_column_nullable(*dst) && read_meta_columns) { auto null_map_ptr = static_cast(*dst).get_null_map_column_ptr(); size_t num_read = *n; // in not-null to null linked-schemachange mode, @@ -1859,23 +2131,35 @@ Status ArrayFileColumnIterator::init_prefetcher(const SegmentPrefetchParams& par void ArrayFileColumnIterator::collect_prefetchers( std::map>& prefetchers, PrefetcherInitMethod init_method) { - _offset_iterator->collect_prefetchers(prefetchers, init_method); - // the actual data pages to read of item column depends on the read result of offset column, - // so we can't init prefetch blocks according to rowids, just prefetch all data blocks here. - _item_iterator->collect_prefetchers(prefetchers, PrefetcherInitMethod::ALL_DATA_BLOCKS); + if (!need_to_read()) { + return; + } + if (!read_null_map_only()) { + _offset_iterator->collect_prefetchers(prefetchers, init_method); + } if (_array_reader->is_nullable()) { _null_iterator->collect_prefetchers(prefetchers, init_method); } + if (read_offset_only() || read_null_map_only()) { + return; + } + // the actual data pages to read of item column depends on the read result of offset column, + // so we can't init prefetch blocks according to rowids, just prefetch all data blocks here. + if (_item_iterator->need_to_read()) { + _item_iterator->collect_prefetchers(prefetchers, PrefetcherInitMethod::ALL_DATA_BLOCKS); + } } Status ArrayFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, MutableColumnPtr& dst) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "Array column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); + _convert_to_place_holder_column(dst, count); return Status::OK(); } + _recovery_from_place_holder_column(dst); + for (size_t i = 0; i < count; ++i) { // TODO(cambyszju): now read array one by one, need optimize later RETURN_IF_ERROR(seek_to_ordinal(rowids[i])); @@ -1885,44 +2169,93 @@ Status ArrayFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size return Status::OK(); } -void ArrayFileColumnIterator::set_need_to_read() { - set_reading_flag(ReadingFlag::NEED_TO_READ); - _item_iterator->set_need_to_read(); +void ArrayFileColumnIterator::set_lazy_output_requirement() { + set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); + _item_iterator->set_lazy_output_requirement(); } void ArrayFileColumnIterator::remove_pruned_sub_iterators() { _item_iterator->remove_pruned_sub_iterators(); } +void ArrayFileColumnIterator::set_read_phase(ReadPhase mode) { + ColumnIterator::set_read_phase(mode); + _item_iterator->set_read_phase(mode); +} + +void ArrayFileColumnIterator::finalize_lazy_phase(MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + auto& column_array = assert_cast( + dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); + auto item_column_ptr = IColumn::mutate(std::move(column_array.get_data_ptr())); + _item_iterator->finalize_lazy_phase(item_column_ptr); + column_array.get_data_ptr() = std::move(item_column_ptr); +} + +void ArrayFileColumnIterator::set_read_requirement(ReadRequirement requirement) { + set_read_requirement_self(requirement); + _item_iterator->set_read_requirement(requirement); +} + +bool ArrayFileColumnIterator::has_lazy_read_target() const { + return _read_requirement == ReadRequirement::LAZY_OUTPUT || + _item_iterator->has_lazy_read_target(); +} + Status ArrayFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) { - if (all_access_paths.empty()) { + if (all_access_paths.empty() && predicate_access_paths.empty()) { return Status::OK(); } + const auto requirement_before_access_path = _read_requirement; if (!predicate_access_paths.empty()) { - set_reading_flag(ReadingFlag::READING_FOR_PREDICATE); - DLOG(INFO) << "Array column iterator set sub-column " << _column_name - << " to READING_FOR_PREDICATE"; + set_read_requirement_self(ReadRequirement::PREDICATE); + DLOG(INFO) << "Array column iterator set sub-column " << _column_name << " to PREDICATE"; } + const bool has_current_level_data_path = + std::ranges::any_of(all_access_paths, [this](const TColumnAccessPath& path) { + return is_current_level_data_access_path(path, _column_name); + }); auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); - auto sub_predicate_access_paths = DORIS_TRY(_get_sub_access_paths(predicate_access_paths)); - - // Check for meta-only modes (OFFSET_ONLY or NULL_MAP_ONLY) - _check_and_set_meta_read_mode(sub_all_access_paths); - if (read_offset_only()) { - _item_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - DLOG(INFO) << "Array column iterator set column " << _column_name - << " to OFFSET_ONLY reading mode, item column set to SKIP_READING"; - return Status::OK(); - } - if (read_null_map_only()) { - _item_iterator->set_reading_flag(ReadingFlag::SKIP_READING); - DLOG(INFO) << "Array column iterator set column " << _column_name - << " to NULL_MAP_ONLY reading mode, item column set to SKIP_READING"; - return Status::OK(); + auto sub_predicate_access_paths = + DORIS_TRY(_get_sub_access_paths(predicate_access_paths, true)); + if (has_current_level_data_path) { + // A current-level data path already reads the array offsets while materializing the array. + // Do not let a redundant current-level OFFSET/NULL path switch this iterator into a + // meta-only mode that would skip item data. + remove_current_level_meta_access_paths(sub_all_access_paths); + } + const bool has_current_level_predicate_meta_path = + std::ranges::any_of(sub_predicate_access_paths, is_current_level_meta_access_path); + // Current-level predicate metadata paths are consumed by this array iterator and must not be + // forwarded to the item iterator. The FE keeps all_access_paths as a superset of predicate + // paths, so meta-only mode is still decided from sub_all_access_paths below. + auto removed = + std::ranges::remove_if(sub_predicate_access_paths, is_current_level_meta_access_path); + sub_predicate_access_paths.erase(removed.begin(), removed.end()); + + if (sub_predicate_access_paths.empty()) { + // Check for meta-only modes (OFFSET_ONLY or NULL_MAP_ONLY). Only skip the item + // iterator when no predicate sub-path needs it in the predicate phase. + _check_and_set_meta_read_mode(requirement_before_access_path, sub_all_access_paths); + if (read_offset_only()) { + _item_iterator->set_read_requirement(ReadRequirement::SKIP); + DLOG(INFO) << "Array column iterator set column " << _column_name + << " to OFFSET_ONLY meta read mode, item column set to SKIP"; + return Status::OK(); + } + if (read_null_map_only()) { + _item_iterator->set_read_requirement(ReadRequirement::SKIP); + DLOG(INFO) << "Array column iterator set column " << _column_name + << " to NULL_MAP_ONLY meta read mode, item column set to SKIP"; + return Status::OK(); + } } + // OFFSET/NULL at the current array level is consumed by this iterator. After deciding that + // the array is not in a meta-only mode, do not forward those paths to the item iterator. + remove_current_level_meta_access_paths(sub_all_access_paths); const auto no_sub_column_to_skip = sub_all_access_paths.empty(); const auto no_predicate_sub_column = sub_predicate_access_paths.empty(); @@ -1935,7 +2268,17 @@ Status ArrayFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_a } } - if (!no_predicate_sub_column) { + if (no_predicate_sub_column) { + // Current-level predicate meta paths (OFFSET/NULL) are consumed by the array itself and + // removed before forwarding paths to the item iterator. If they are the only predicate + // paths, the item iterator may still be needed later for lazy materialization, but it must + // not be promoted to PREDICATE. Only propagate the predicate requirement when the + // parent predicate really applies to the item/whole value instead of array metadata only. + if (_read_requirement == ReadRequirement::PREDICATE && + !has_current_level_predicate_meta_path) { + _item_iterator->set_read_requirement(ReadRequirement::PREDICATE); + } + } else { for (auto& path : sub_predicate_access_paths) { if (path.data_access_path.path[0] == ACCESS_ALL) { path.data_access_path.path[0] = _item_iterator->column_name(); @@ -1944,9 +2287,12 @@ Status ArrayFileColumnIterator::set_access_paths(const TColumnAccessPaths& all_a } if (!no_sub_column_to_skip || !no_predicate_sub_column) { - _item_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ); RETURN_IF_ERROR( _item_iterator->set_access_paths(sub_all_access_paths, sub_predicate_access_paths)); + // Predicate-only item paths stay PREDICATE because this update runs after + // child set_access_paths() and read requirements are monotonic. Non-predicate item paths are + // marked as lazy materialization targets. + _item_iterator->set_read_requirement_self(ReadRequirement::LAZY_OUTPUT); } return Status::OK(); } @@ -1971,18 +2317,26 @@ Status StringFileColumnIterator::init(const ColumnIteratorOptions& opts) { Status StringFileColumnIterator::set_access_paths( const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) { - if (all_access_paths.empty()) { + if (all_access_paths.empty() && predicate_access_paths.empty()) { return Status::OK(); } + const auto requirement_before_access_path = _read_requirement; if (!predicate_access_paths.empty()) { - set_reading_flag(ReadingFlag::READING_FOR_PREDICATE); + set_read_requirement(ReadRequirement::PREDICATE); } + const bool has_current_level_data_path = + std::ranges::any_of(all_access_paths, [this](const TColumnAccessPath& path) { + return is_current_level_data_access_path(path, _column_name); + }); // Strip the column name from path[0] before checking for meta-only modes. // Raw paths look like ["col_name", "OFFSET"] or ["col_name", "NULL"]. auto sub_all_access_paths = DORIS_TRY(_get_sub_access_paths(all_access_paths)); - _check_and_set_meta_read_mode(sub_all_access_paths); + if (has_current_level_data_path) { + remove_current_level_meta_access_paths(sub_all_access_paths); + } + _check_and_set_meta_read_mode(requirement_before_access_path, sub_all_access_paths); // OFFSET_ONLY mode is fundamentally incompatible with CHAR columns: // CHAR is stored padded to its declared length (see // OlapColumnDataConvertorChar::clone_and_padding), so the per-row length @@ -2007,10 +2361,10 @@ Status StringFileColumnIterator::set_access_paths( } if (read_offset_only()) { DLOG(INFO) << "String column iterator set column " << _column_name - << " to OFFSET_ONLY reading mode"; + << " to OFFSET_ONLY meta read mode"; } else if (read_null_map_only()) { DLOG(INFO) << "String column iterator set column " << _column_name - << " to NULL_MAP_ONLY reading mode"; + << " to NULL_MAP_ONLY meta read mode"; } return Status::OK(); @@ -2020,23 +2374,45 @@ Status StringFileColumnIterator::set_access_paths( FileColumnIterator::FileColumnIterator(std::shared_ptr reader) : _reader(reader) {} -void ColumnIterator::_check_and_set_meta_read_mode(const TColumnAccessPaths& sub_all_access_paths) { +void ColumnIterator::_check_and_set_meta_read_mode(ReadRequirement requirement_before_access_path, + const TColumnAccessPaths& sub_all_access_paths) { + _meta_read_mode = MetaReadMode::DEFAULT; + if (requirement_before_access_path != ReadRequirement::NORMAL && + requirement_before_access_path != ReadRequirement::SKIP) { + // A stronger requirement means a parent/full-data path already required this iterator + // to materialize data. In that case a later predicate NULL/OFFSET path is only + // an additional predicate requirement and must not downgrade the read to + // meta-only. + return; + } + + bool has_offset_path = false; + bool has_null_path = false; for (const auto& path : sub_all_access_paths) { - if (!path.data_access_path.path.empty()) { - if (StringCaseEqual()(path.data_access_path.path[0], ACCESS_OFFSET)) { - _read_mode = ReadMode::OFFSET_ONLY; - return; - } else if (StringCaseEqual()(path.data_access_path.path[0], ACCESS_NULL)) { - _read_mode = ReadMode::NULL_MAP_ONLY; - return; - } + if (!is_current_level_meta_access_path(path)) { + _meta_read_mode = MetaReadMode::DEFAULT; + return; + } + const auto& component = path.data_access_path.path[0]; + if (StringCaseEqual()(component, ACCESS_OFFSET)) { + has_offset_path = true; + } else { + has_null_path = true; } } - _read_mode = ReadMode::DEFAULT; + if (has_offset_path) { + // OFFSET_ONLY skips actual child/string data, but nullable complex iterators still + // materialize the current-level null map. So OFFSET covers OFFSET+NULL metadata. + _meta_read_mode = MetaReadMode::OFFSET_ONLY; + } else if (has_null_path) { + _meta_read_mode = MetaReadMode::NULL_MAP_ONLY; + } else { + _meta_read_mode = MetaReadMode::DEFAULT; + } } Status FileColumnIterator::init(const ColumnIteratorOptions& opts) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (_read_requirement == ReadRequirement::SKIP) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -2081,7 +2457,7 @@ void FileColumnIterator::_trigger_prefetch_if_eligible(ordinal_t ord) { } Status FileColumnIterator::seek_to_ordinal(ordinal_t ord) { - if (_reading_flag == ReadingFlag::SKIP_READING) { + if (!need_to_read()) { DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; return Status::OK(); } @@ -2142,6 +2518,14 @@ Status FileColumnIterator::next_batch_of_zone_map(size_t* n, MutableColumnPtr& d } Status FileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) { + if (!need_to_read()) { + DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; + _convert_to_place_holder_column(dst, *n); + return Status::OK(); + } + + _recovery_from_place_holder_column(dst); + if (read_null_map_only()) { DLOG(INFO) << "File column iterator column " << _column_name << " in NULL_MAP_ONLY mode, reading only null map."; @@ -2190,12 +2574,6 @@ Status FileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* ha return Status::OK(); } - if (_reading_flag == ReadingFlag::SKIP_READING) { - DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(*n); - return Status::OK(); - } - size_t curr_size = dst->byte_size(); dst->reserve(*n); size_t remaining = *n; @@ -2255,6 +2633,14 @@ Status FileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* ha Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, MutableColumnPtr& dst) { + if (!need_to_read()) { + DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; + _convert_to_place_holder_column(dst, count); + return Status::OK(); + } + + _recovery_from_place_holder_column(dst); + if (read_null_map_only()) { DLOG(INFO) << "File column iterator column " << _column_name << " in NULL_MAP_ONLY mode, reading only null map by rowids."; @@ -2307,9 +2693,21 @@ Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t co total_read_count += nrows_to_read; remaining -= nrows_to_read; } else { - memset(null_map_data.data() + base_size + total_read_count, 0, nrows_to_read); - total_read_count += nrows_to_read; - remaining -= nrows_to_read; + rowid_t current_ordinal_in_page = + cast_set(_page.offset_in_page + _page.first_ordinal); + size_t rows_in_current_page = 0; + for (size_t i = 0; i < nrows_to_read; ++i) { + if (rowids[total_read_count + i] - current_ordinal_in_page >= nrows_to_read) { + break; + } + ++rows_in_current_page; + } + DCHECK_GT(rows_in_current_page, 0); + memset(null_map_data.data() + base_size + total_read_count, 0, + rows_in_current_page); + _page.offset_in_page += rows_in_current_page; + total_read_count += rows_in_current_page; + remaining -= rows_in_current_page; } } @@ -2318,12 +2716,6 @@ Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t co return Status::OK(); } - if (_reading_flag == ReadingFlag::SKIP_READING) { - DLOG(INFO) << "File column iterator column " << _column_name << " skip reading."; - dst->insert_many_defaults(count); - return Status::OK(); - } - size_t remaining = count; size_t total_read_count = 0; size_t nrows_to_read = 0; diff --git a/be/src/storage/segment/column_reader.h b/be/src/storage/segment/column_reader.h index 64c60008dd52d2..961953a23e1218 100644 --- a/be/src/storage/segment/column_reader.h +++ b/be/src/storage/segment/column_reader.h @@ -19,11 +19,13 @@ #include #include +#include #include #include // for size_t #include // for uint32_t -#include // for unique_ptr +#include +#include // for unique_ptr #include #include #include @@ -364,7 +366,7 @@ class ColumnIterator { virtual Status set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) { if (!predicate_access_paths.empty()) { - _reading_flag = ReadingFlag::READING_FOR_PREDICATE; + set_read_requirement_self(ReadRequirement::PREDICATE); } return Status::OK(); } @@ -373,33 +375,28 @@ class ColumnIterator { const std::string& column_name() const { return _column_name; } - // Since there may be multiple paths with conflicts or overlaps, - // we need to define several reading flags: + // Per-iterator read requirement derived from nested access paths. // - // NORMAL_READING — Default value, indicating that the column should be read. - // SKIP_READING — The column should not be read. - // NEED_TO_READ — The column must be read. - // READING_FOR_PREDICATE — The column is required for predicate evaluation. - // - // For example, suppose there are two paths: - // - Path 1 specifies that column A needs to be read, so it is marked as NEED_TO_READ. - // - Path 2 specifies that the column should not be read, but since it is already marked as NEED_TO_READ, - // it should not be changed to SKIP_READING. - enum class ReadingFlag : int { - NORMAL_READING, - SKIP_READING, - NEED_TO_READ, - READING_FOR_PREDICATE - }; - void set_reading_flag(ReadingFlag flag) { - if (static_cast(flag) > static_cast(_reading_flag)) { - _reading_flag = flag; - } + // The ordering is intentional and used by set_read_requirement_self(): requirements are + // monotonic and a weaker requirement must not downgrade a stronger one. + // - NORMAL: no pruning decision has been made yet. + // - SKIP: this iterator should not be read. + // - LAZY_OUTPUT: materialize this iterator in the lazy phase after predicate filtering. + // - PREDICATE: read this iterator in the predicate phase. This must stay stronger than + // LAZY_OUTPUT because parents may mark children as LAZY_OUTPUT after child set_access_paths() + // has already promoted predicate-only children to PREDICATE. + enum class ReadRequirement : int { NORMAL, SKIP, LAZY_OUTPUT, PREDICATE }; + + // Set the read requirement on this iterator and all nested child iterators. + virtual void set_read_requirement(ReadRequirement requirement) { + set_read_requirement_self(requirement); } - ReadingFlag reading_flag() const { return _reading_flag; } + ReadRequirement read_requirement() const { return _read_requirement; } - virtual void set_need_to_read() { set_reading_flag(ReadingFlag::NEED_TO_READ); } + virtual void set_lazy_output_requirement() { + set_read_requirement(ReadRequirement::LAZY_OUTPUT); + } virtual void remove_pruned_sub_iterators() {}; @@ -416,26 +413,96 @@ class ColumnIterator { static constexpr const char* ACCESS_NULL = "NULL"; // Meta-only read modes: - // - OFFSET_ONLY: only read offset information (e.g., for array_size/map_size/string_length) + // - OFFSET_ONLY: read offsets while skipping actual child/string data. For nullable + // complex columns, the parent null map is still materialized when needed. // - NULL_MAP_ONLY: only read null map (e.g., for IS NULL / IS NOT NULL predicates) // When these modes are enabled, actual content data is skipped. - enum class ReadMode : int { DEFAULT, OFFSET_ONLY, NULL_MAP_ONLY }; + enum class MetaReadMode : int { DEFAULT, OFFSET_ONLY, NULL_MAP_ONLY }; + + bool read_offset_only() const { return _meta_read_mode == MetaReadMode::OFFSET_ONLY; } + bool read_null_map_only() const { return _meta_read_mode == MetaReadMode::NULL_MAP_ONLY; } - bool read_offset_only() const { return _read_mode == ReadMode::OFFSET_ONLY; } - bool read_null_map_only() const { return _read_mode == ReadMode::NULL_MAP_ONLY; } + // The current scanner phase. This is intentionally separate from ReadRequirement + // (why this iterator is needed) and MetaReadMode (what physical metadata to read). + enum class ReadPhase : int { + NORMAL, // default full materialization without lazy read split + PREDICATE, // predicate evaluation before row filtering + LAZY // post-filter lazy materialization + }; + + virtual void set_read_phase(ReadPhase mode) { + _read_phase = mode; + if (mode == ReadPhase::PREDICATE) { + _has_place_holder_column = false; + } + } + + virtual bool need_to_read() const { + switch (_read_phase) { + case ReadPhase::NORMAL: + return _read_requirement != ReadRequirement::SKIP; + case ReadPhase::PREDICATE: + return _read_requirement == ReadRequirement::PREDICATE; + case ReadPhase::LAZY: + return _read_requirement == ReadRequirement::LAZY_OUTPUT; + default: + return false; + } + } + + // Whether the current iterator itself should materialize meta columns, such as + // the null-map column or the offset column, into the destination column. + // + // Do not use the virtual need_to_read() here. Complex iterators override + // need_to_read() in LAZY mode to keep the parent iterator active when only a + // nested child still has data to materialize. That parent-level control-flow + // decision is different from materializing the parent's own offsets/null-map: + // if the parent was already read for predicate evaluation, LAZY mode should + // only fill the missing children and must not append parent meta again. + bool need_to_read_meta_columns() const { return ColumnIterator::need_to_read(); } + + virtual void finalize_lazy_phase(MutableColumnPtr& dst) { + _recovery_from_place_holder_column(dst); + } + + // Set only this iterator's requirement without modifying requirements of any nested child + // iterators. Use this when the parent/wrapper state must be updated while child requirements + // are decided independently. + virtual void set_read_requirement_self(ReadRequirement requirement) { + if (static_cast(requirement) > static_cast(_read_requirement)) { + _read_requirement = requirement; + } + } + + // Whether this iterator or any nested iterator has data that must be materialized + // in lazy mode. Predicate-only branches are read before filtering and must not be + // re-read in the lazy phase. Meta-only access paths still become lazy targets when + // they appear only in all_access_paths, because OFFSET/NULL is the requested output. + virtual bool has_lazy_read_target() const { + return _read_requirement == ReadRequirement::LAZY_OUTPUT; + } protected: - // Checks sub access paths for OFFSET or NULL meta-only modes and - // updates _read_mode accordingly. Use the accessor helpers - // read_offset_only() / read_null_map_only() to query the current mode. - void _check_and_set_meta_read_mode(const TColumnAccessPaths& sub_all_access_paths); + void _convert_to_place_holder_column(MutableColumnPtr& dst, size_t count); + + void _recovery_from_place_holder_column(MutableColumnPtr& dst); - Result _get_sub_access_paths(const TColumnAccessPaths& access_paths); + // Derive current-level meta-only read mode from access paths. Meta-only is valid only when + // this iterator had no data-read requirement before applying the current paths, and every + // visible path at this level is NULL/OFFSET metadata. + void _check_and_set_meta_read_mode(ReadRequirement requirement_before_access_path, + const TColumnAccessPaths& sub_all_access_paths); + + Result _get_sub_access_paths(TColumnAccessPaths access_paths, + bool is_predicate = false); ColumnIteratorOptions _opts; - ReadingFlag _reading_flag {ReadingFlag::NORMAL_READING}; - ReadMode _read_mode = ReadMode::DEFAULT; + ReadRequirement _read_requirement {ReadRequirement::NORMAL}; + MetaReadMode _meta_read_mode = MetaReadMode::DEFAULT; + ReadPhase _read_phase {ReadPhase::NORMAL}; std::string _column_name; + + bool _has_place_holder_column {false}; }; // This iterator is used to read column data from file @@ -584,6 +651,11 @@ class OffsetFileColumnIterator final : public ColumnIterator { return _offset_iterator->read_by_rowids(rowids, count, dst); } + void set_read_requirement(ReadRequirement requirement) override { + set_read_requirement_self(requirement); + _offset_iterator->set_read_requirement(requirement); + } + Status init_prefetcher(const SegmentPrefetchParams& params) override; void collect_prefetchers( std::map>& prefetchers, @@ -629,10 +701,33 @@ class MapFileColumnIterator final : public ColumnIterator { Status set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) override; - void set_need_to_read() override; + void set_lazy_output_requirement() override; void remove_pruned_sub_iterators() override; + void set_read_phase(ReadPhase mode) override; + + bool need_to_read() const override { + switch (_read_phase) { + case ReadPhase::NORMAL: + return _read_requirement != ReadRequirement::SKIP; + case ReadPhase::PREDICATE: + return _read_requirement == ReadRequirement::PREDICATE; + case ReadPhase::LAZY: + // In lazy mode, read this map only when at least one key/value branch still + // has non-predicate data to materialize. + return has_lazy_read_target(); + default: + return false; + } + } + + void finalize_lazy_phase(MutableColumnPtr& dst) override; + + void set_read_requirement(ReadRequirement requirement) override; + + bool has_lazy_read_target() const override; + private: std::shared_ptr _map_reader = nullptr; ColumnIteratorUPtr _null_iterator; @@ -673,7 +768,7 @@ class StructFileColumnIterator final : public ColumnIterator { Status set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) override; - void set_need_to_read() override; + void set_lazy_output_requirement() override; void remove_pruned_sub_iterators() override; @@ -682,6 +777,27 @@ class StructFileColumnIterator final : public ColumnIterator { std::map>& prefetchers, PrefetcherInitMethod init_method) override; + void set_read_phase(ReadPhase mode) override; + + bool need_to_read() const override { + switch (_read_phase) { + case ReadPhase::NORMAL: + return _read_requirement != ReadRequirement::SKIP; + case ReadPhase::PREDICATE: + return _read_requirement == ReadRequirement::PREDICATE; + case ReadPhase::LAZY: + // In lazy mode, read this struct only when at least one nested branch still + // has non-predicate data to materialize. + return has_lazy_read_target(); + default: + return false; + } + } + + void finalize_lazy_phase(MutableColumnPtr& dst) override; + void set_read_requirement(ReadRequirement requirement) override; + bool has_lazy_read_target() const override; + private: std::shared_ptr _struct_reader = nullptr; ColumnIteratorUPtr _null_iterator; @@ -720,7 +836,7 @@ class ArrayFileColumnIterator final : public ColumnIterator { Status set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) override; - void set_need_to_read() override; + void set_lazy_output_requirement() override; void remove_pruned_sub_iterators() override; @@ -729,6 +845,29 @@ class ArrayFileColumnIterator final : public ColumnIterator { std::map>& prefetchers, PrefetcherInitMethod init_method) override; + void set_read_phase(ReadPhase mode) override; + + bool need_to_read() const override { + switch (_read_phase) { + case ReadPhase::NORMAL: + return _read_requirement != ReadRequirement::SKIP; + case ReadPhase::PREDICATE: + return _read_requirement == ReadRequirement::PREDICATE; + case ReadPhase::LAZY: + // In lazy mode, read this array only when its item branch still has + // non-predicate data to materialize. + return has_lazy_read_target(); + default: + return false; + } + } + + void finalize_lazy_phase(MutableColumnPtr& dst) override; + + void set_read_requirement(ReadRequirement requirement) override; + + bool has_lazy_read_target() const override; + private: std::shared_ptr _array_reader = nullptr; std::unique_ptr _offset_iterator; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index e865828c52bf68..4d59c423d14d1b 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,30 @@ using namespace ErrorCode; namespace segment_v2 { namespace { +class ScopedColumnIteratorReadPhase { +public: + ScopedColumnIteratorReadPhase(ColumnIterator* column_iter, ColumnIterator::ReadPhase mode) + : _column_iter(column_iter) { + DORIS_CHECK(_column_iter != nullptr); + _column_iter->set_read_phase(mode); + } + + ScopedColumnIteratorReadPhase(const ScopedColumnIteratorReadPhase&) = delete; + ScopedColumnIteratorReadPhase& operator=(const ScopedColumnIteratorReadPhase&) = delete; + + ~ScopedColumnIteratorReadPhase() { + // ReadPhase is a per-read phase knob. SegmentIterator only needs a + // temporary PREDICATE/LAZY mode while reading one column in one phase; it + // must be restored before the next column or later normal reads reuse the + // same ColumnIterator. Keep the restoration in one scoped helper instead + // of open-coding the same Defer block at every call site. + _column_iter->set_read_phase(ColumnIterator::ReadPhase::NORMAL); + } + +private: + ColumnIterator* _column_iter = nullptr; +}; + Status tablet_column_id_by_slot(const TabletSchemaSPtr& tablet_schema, const SlotDescriptor* slot, ColumnId* cid) { int32_t field_index = -1; @@ -530,6 +555,10 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _score_runtime = _opts.score_runtime; _ann_topn_runtime = _opts.ann_topn_runtime; + _enable_prune_nested_column = _opts.io_ctx.reader_type == ReaderType::READER_QUERY && + _opts.runtime_state && + _opts.runtime_state->enable_prune_nested_column(); + if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); } @@ -767,8 +796,14 @@ void SegmentIterator::_init_segment_prefetchers() { ? PrefetcherInitMethod::FROM_ROWIDS : PrefetcherInitMethod::ALL_DATA_BLOCKS; std::map> prefetchers; - for (const auto& column_iter : _column_iterators) { + for (size_t idx = 0; idx < _column_iterators.size(); ++idx) { + auto cid = cast_set(idx); + auto* column_iter = _column_iterators[cid].get(); if (column_iter != nullptr) { + ScopedColumnIteratorReadPhase scoped_read_phase { + column_iter, _support_lazy_read_pruned_columns.contains(cid) + ? ColumnIterator::ReadPhase::PREDICATE + : ColumnIterator::ReadPhase::NORMAL}; column_iter->collect_prefetchers(prefetchers, init_method); } } @@ -2093,6 +2128,25 @@ Status SegmentIterator::_vec_init_lazy_materialization() { if (_is_common_expr_column[cid] || _is_pred_column[cid]) { auto loc = _schema_block_id_map[cid]; _columns_to_filter.push_back(loc); + + const auto field_type = _schema->column(cid)->type(); + if (_is_common_expr_column[cid] && _enable_prune_nested_column && + (field_type == FieldType::OLAP_FIELD_TYPE_STRUCT || + field_type == FieldType::OLAP_FIELD_TYPE_ARRAY || + field_type == FieldType::OLAP_FIELD_TYPE_MAP)) { + DCHECK(_column_iterators[cid]); + if (_column_iterators[cid]->read_requirement() == + ColumnIterator::ReadRequirement::PREDICATE && + _column_iterators[cid]->has_lazy_read_target()) { + // Only split lazy recovery for complex common expr columns that have + // both predicate-only and non-predicate nested targets. The two requirement + // checks already imply that nested-column pruning happened: without an + // explicit predicate sub-path the parent would not be + // PREDICATE, and without a pruned non-predicate child there + // would be no lazy target to recover after filtering. + _support_lazy_read_pruned_columns.emplace(cid); + } + } } } @@ -2409,16 +2463,22 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint16 }) } + auto* column_iter = _column_iterators[cid].get(); + ScopedColumnIteratorReadPhase scoped_read_phase { + column_iter, _support_lazy_read_pruned_columns.contains(cid) + ? ColumnIterator::ReadPhase::PREDICATE + : ColumnIterator::ReadPhase::NORMAL}; + if (is_continuous) { size_t rows_read = nrows_read; _opts.stats->predicate_column_read_seek_num += 1; if (_opts.runtime_state && _opts.runtime_state->enable_profile()) { SCOPED_RAW_TIMER(&_opts.stats->predicate_column_read_seek_ns); - RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_block_rowids[0])); + RETURN_IF_ERROR(column_iter->seek_to_ordinal(_block_rowids[0])); } else { - RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_block_rowids[0])); + RETURN_IF_ERROR(column_iter->seek_to_ordinal(_block_rowids[0])); } - RETURN_IF_ERROR(_column_iterators[cid]->next_batch(&rows_read, column)); + RETURN_IF_ERROR(column_iter->next_batch(&rows_read, column)); if (rows_read != nrows_read) { return Status::Error("nrows({}) != rows_read({})", nrows_read, rows_read); @@ -2438,20 +2498,18 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint16 _opts.stats->predicate_column_read_seek_num += 1; if (_opts.runtime_state && _opts.runtime_state->enable_profile()) { SCOPED_RAW_TIMER(&_opts.stats->predicate_column_read_seek_ns); - RETURN_IF_ERROR( - _column_iterators[cid]->seek_to_ordinal(_block_rowids[processed])); + RETURN_IF_ERROR(column_iter->seek_to_ordinal(_block_rowids[processed])); } else { - RETURN_IF_ERROR( - _column_iterators[cid]->seek_to_ordinal(_block_rowids[processed])); + RETURN_IF_ERROR(column_iter->seek_to_ordinal(_block_rowids[processed])); } - RETURN_IF_ERROR(_column_iterators[cid]->next_batch(&rows_read, column)); + RETURN_IF_ERROR(column_iter->next_batch(&rows_read, column)); if (rows_read != current_batch_size) { return Status::Error( "batch nrows({}) != rows_read({})", current_batch_size, rows_read); } } else { - RETURN_IF_ERROR(_column_iterators[cid]->read_by_rowids( - &_block_rowids[processed], current_batch_size, column)); + RETURN_IF_ERROR(column_iter->read_by_rowids(&_block_rowids[processed], + current_batch_size, column)); } processed += current_batch_size; } @@ -2748,7 +2806,8 @@ Status SegmentIterator::_read_columns_by_rowids(std::vector& read_colu std::vector& rowid_vector, uint16_t* sel_rowid_idx, size_t select_size, MutableColumns* mutable_columns, - bool init_condition_cache) { + bool init_condition_cache, + bool read_for_predicate) { SCOPED_RAW_TIMER(&_opts.stats->lazy_read_ns); std::vector rowids(select_size); @@ -2792,10 +2851,43 @@ Status SegmentIterator::_read_columns_by_rowids(std::vector& read_colu "SegmentIterator meet invalid column, return columns size {}, cid {}", _current_return_columns.size(), cid); } - RETURN_IF_ERROR(_column_iterators[cid]->read_by_rowids(rowids.data(), select_size, - _current_return_columns[cid])); + + auto* column_iter = _column_iterators[cid].get(); + ScopedColumnIteratorReadPhase scoped_read_phase { + column_iter, read_for_predicate && _support_lazy_read_pruned_columns.contains(cid) + ? ColumnIterator::ReadPhase::PREDICATE + : ColumnIterator::ReadPhase::NORMAL}; + + RETURN_IF_ERROR(column_iter->read_by_rowids(rowids.data(), select_size, + _current_return_columns[cid])); + } + + return Status::OK(); +} + +Status SegmentIterator::_read_lazy_pruned_columns(Block* block) { + if (_support_lazy_read_pruned_columns.empty()) { + return Status::OK(); } + SCOPED_RAW_TIMER(&_opts.stats->lazy_read_pruned_ns); + DorisVector rowids(_selected_size); + for (size_t i = 0; i < _selected_size; ++i) { + rowids[i] = _block_rowids[_sel_rowid_idx[i]]; + } + + for (auto cid : _support_lazy_read_pruned_columns) { + auto loc = _schema_block_id_map[cid]; + auto column = IColumn::mutate(std::move(block->get_by_position(loc).column)); + auto* column_iter = _column_iterators[cid].get(); + ScopedColumnIteratorReadPhase scoped_read_phase {column_iter, + ColumnIterator::ReadPhase::LAZY}; + if (_selected_size > 0) { + RETURN_IF_ERROR(column_iter->read_by_rowids(rowids.data(), _selected_size, column)); + } + column_iter->finalize_lazy_phase(column); + block->get_by_position(loc).column = std::move(column); + } return Status::OK(); } @@ -3013,7 +3105,7 @@ Status SegmentIterator::_next_batch_internal(Block* block) { SCOPED_RAW_TIMER(&_opts.stats->non_predicate_read_ns); RETURN_IF_ERROR(_read_columns_by_rowids( _common_expr_column_ids, _block_rowids, _sel_rowid_idx.data(), - _selected_size, &_current_return_columns)); + _selected_size, &_current_return_columns, false, true)); _replace_version_col_if_needed(_common_expr_column_ids, _selected_size); _update_lsn_col_if_needed(_common_expr_column_ids, _selected_size); _update_tso_col_if_needed(_common_expr_column_ids, _selected_size); @@ -3048,7 +3140,7 @@ Status SegmentIterator::_next_batch_internal(Block* block) { RETURN_IF_ERROR(_read_columns_by_rowids( _non_predicate_columns, _block_rowids, _sel_rowid_idx.data(), _selected_size, &_current_return_columns, - _opts.condition_cache_digest && !_find_condition_cache)); + _opts.condition_cache_digest && !_find_condition_cache, false)); _replace_version_col_if_needed(_non_predicate_columns, _selected_size); _update_lsn_col_if_needed(_non_predicate_columns, _selected_size); _update_tso_col_if_needed(_non_predicate_columns, _selected_size); @@ -3062,6 +3154,8 @@ Status SegmentIterator::_next_batch_internal(Block* block) { } } } + + RETURN_IF_ERROR(_read_lazy_pruned_columns(block)); } // step5: output columns diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 8fd143867ed97b..2e1d8c90070074 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -228,7 +228,9 @@ class SegmentIterator : public RowwiseIterator { std::vector& rowid_vector, uint16_t* sel_rowid_idx, size_t select_size, MutableColumns* mutable_columns, - bool init_condition_cache = false); + bool init_condition_cache = false, + bool read_for_predicate = false); + [[nodiscard]] Status _read_lazy_pruned_columns(Block* block); Status copy_column_data_by_selector(IColumn* input_col_ptr, MutableColumnPtr& output_col, uint16_t* sel_rowid_idx, uint16_t select_size, @@ -373,6 +375,9 @@ class SegmentIterator : public RowwiseIterator { bool _is_need_short_eval = false; bool _is_need_expr_eval = false; + std::set _support_lazy_read_pruned_columns; + bool _enable_prune_nested_column = false; + // fields for vectorization execution std::vector _vec_pred_column_ids; // keep columnId of columns for vectorized predicate evaluation diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp b/be/src/storage/segment/variant/variant_column_reader.cpp index df9561ada7cd7f..9b71a643835985 100644 --- a/be/src/storage/segment/variant/variant_column_reader.cpp +++ b/be/src/storage/segment/variant/variant_column_reader.cpp @@ -85,7 +85,7 @@ class ReaderOwnedColumnIterator final : public ColumnIterator { : _inner(std::move(inner)), _owner(std::move(owner)) { DCHECK(_inner != nullptr); set_column_name(_inner->column_name()); - set_reading_flag(_inner->reading_flag()); + set_read_requirement(_inner->read_requirement()); } Status init(const ColumnIteratorOptions& opts) override { return _inner->init(opts); } @@ -129,15 +129,36 @@ class ReaderOwnedColumnIterator final : public ColumnIterator { Status set_access_paths(const TColumnAccessPaths& all_access_paths, const TColumnAccessPaths& predicate_access_paths) override { RETURN_IF_ERROR(_inner->set_access_paths(all_access_paths, predicate_access_paths)); - set_reading_flag(_inner->reading_flag()); + ColumnIterator::set_read_requirement_self(_inner->read_requirement()); return Status::OK(); } - void set_need_to_read() override { - _inner->set_need_to_read(); - set_reading_flag(_inner->reading_flag()); + void set_read_requirement(ReadRequirement requirement) override { + _inner->set_read_requirement(requirement); + ColumnIterator::set_read_requirement_self(_inner->read_requirement()); } + void set_read_requirement_self(ReadRequirement requirement) override { + _inner->set_read_requirement_self(requirement); + ColumnIterator::set_read_requirement_self(_inner->read_requirement()); + } + + void set_lazy_output_requirement() override { + _inner->set_lazy_output_requirement(); + ColumnIterator::set_read_requirement_self(_inner->read_requirement()); + } + + void set_read_phase(ReadPhase mode) override { + ColumnIterator::set_read_phase(mode); + _inner->set_read_phase(mode); + } + + void finalize_lazy_phase(MutableColumnPtr& dst) override { _inner->finalize_lazy_phase(dst); } + + bool has_lazy_read_target() const override { return _inner->has_lazy_read_target(); } + + bool need_to_read() const override { return _inner->need_to_read(); } + void remove_pruned_sub_iterators() override { _inner->remove_pruned_sub_iterators(); } Status init_prefetcher(const SegmentPrefetchParams& params) override { diff --git a/be/test/storage/segment/column_reader_test.cpp b/be/test/storage/segment/column_reader_test.cpp index 54bf94a2061ae0..3ee511a5980632 100644 --- a/be/test/storage/segment/column_reader_test.cpp +++ b/be/test/storage/segment/column_reader_test.cpp @@ -23,27 +23,360 @@ #include #include +#include +#include #include +#include #include +#include #include #include "agent/be_exec_version_manager.h" #include "common/config.h" #include "io/fs/file_reader.h" +#include "io/fs/file_system.h" +#include "io/fs/file_writer.h" +#include "io/fs/local_file_system.h" +#include "storage/olap_common.h" #include "storage/segment/column_reader_cache.h" +#include "storage/segment/column_writer.h" #include "storage/segment/mock/mock_segment.h" #include "storage/segment/segment.h" #include "storage/segment/variant/variant_column_reader.h" #include "storage/tablet/tablet_schema.h" +#include "storage/types.h" #include "util/json/path_in_data.h" namespace doris::segment_v2 { +namespace { +class TestColumnIterator final : public ColumnIterator { +public: + Status seek_to_ordinal(ordinal_t /* ord */) override { return Status::OK(); } + + Status next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) override { + dst->insert_many_defaults(*n); + if (has_null != nullptr) { + *has_null = false; + } + return Status::OK(); + } + + Status read_by_rowids(const rowid_t* /* rowids */, const size_t count, + MutableColumnPtr& dst) override { + dst->insert_many_defaults(count); + return Status::OK(); + } + + ordinal_t get_current_ordinal() const override { return 0; } + + void force_set_read_requirement(ReadRequirement requirement) { + _read_requirement = requirement; + } + + Result get_sub_access_paths(const TColumnAccessPaths& access_paths, + bool is_predicate = false) { + return _get_sub_access_paths(access_paths, is_predicate); + } + + void check_and_set_meta_read_mode(ReadRequirement requirement_before_access_path, + const TColumnAccessPaths& access_paths) { + _check_and_set_meta_read_mode(requirement_before_access_path, access_paths); + } + + void convert_to_place_holder_column(MutableColumnPtr& dst, size_t count) { + _convert_to_place_holder_column(dst, count); + } +}; + +TColumnAccessPath create_access_path(std::vector path) { + TColumnAccessPath access_path; + TDataAccessPath data_access_path; + data_access_path.__set_path(std::move(path)); + access_path.__set_data_access_path(std::move(data_access_path)); + return access_path; +} + +std::shared_ptr create_test_reader( + bool is_nullable = false, uint64_t num_rows = 0, + FieldType field_type = FieldType::OLAP_FIELD_TYPE_INT) { + auto reader = std::make_shared(); + reader->_meta_is_nullable = is_nullable; + reader->_num_rows = num_rows; + reader->_meta_type = field_type; + return reader; +} + +class TrackingColumnIterator final : public ColumnIterator { +public: + Status seek_to_ordinal(ordinal_t ord) override { + seek_ordinals.emplace_back(ord); + _current_ordinal = ord; + return Status::OK(); + } + + Status next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) override { + next_batch_sizes.emplace_back(*n); + if (!need_to_read()) { + _convert_to_place_holder_column(dst, *n); + if (has_null != nullptr) { + *has_null = false; + } + return Status::OK(); + } + + _recovery_from_place_holder_column(dst); + dst->insert_many_defaults(*n); + _current_ordinal += *n; + if (has_null != nullptr) { + *has_null = false; + } + return Status::OK(); + } + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + MutableColumnPtr& dst) override { + read_by_rowids_batches.emplace_back(rowids, rowids + count); + if (!need_to_read()) { + _convert_to_place_holder_column(dst, count); + return Status::OK(); + } + + _recovery_from_place_holder_column(dst); + dst->insert_many_defaults(count); + return Status::OK(); + } + + ordinal_t get_current_ordinal() const override { return _current_ordinal; } + + void collect_prefetchers( + std::map>& prefetchers, + PrefetcherInitMethod init_method) override { + record_collect_method(init_method); + prefetchers[init_method].emplace_back(prefetcher()); + } + + SegmentPrefetcher* prefetcher() const { + return reinterpret_cast(const_cast(this)); + } + + void clear_tracking() { + seek_ordinals.clear(); + next_batch_sizes.clear(); + read_by_rowids_batches.clear(); + collect_methods.clear(); + } + + std::vector seek_ordinals; + std::vector next_batch_sizes; + std::vector> read_by_rowids_batches; + std::vector collect_methods; + +private: + void record_collect_method(PrefetcherInitMethod init_method) { + collect_methods.emplace_back(init_method); + } + + ordinal_t _current_ordinal = 0; +}; + +class TrackingFileColumnIterator final : public FileColumnIterator { +public: + explicit TrackingFileColumnIterator(std::shared_ptr reader) + : FileColumnIterator(std::move(reader)) {} + + Status seek_to_ordinal(ordinal_t ord) override { + seek_ordinals.emplace_back(ord); + _current_ordinal = ord; + return Status::OK(); + } + + Status next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null) override { + next_batch_sizes.emplace_back(*n); + dst->insert_many_defaults(*n); + _current_ordinal += *n; + if (has_null != nullptr) { + *has_null = false; + } + return Status::OK(); + } + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + MutableColumnPtr& dst) override { + read_by_rowids_batches.emplace_back(rowids, rowids + count); + dst->insert_many_defaults(count); + return Status::OK(); + } + + ordinal_t get_current_ordinal() const override { return _current_ordinal; } + + void collect_prefetchers( + std::map>& prefetchers, + PrefetcherInitMethod init_method) override { + record_collect_method(init_method); + prefetchers[init_method].emplace_back(prefetcher()); + } + + SegmentPrefetcher* prefetcher() const { + return reinterpret_cast(const_cast(this)); + } + + std::vector seek_ordinals; + std::vector next_batch_sizes; + std::vector> read_by_rowids_batches; + std::vector collect_methods; + +private: + void record_collect_method(PrefetcherInitMethod init_method) { + collect_methods.emplace_back(init_method); + } + + ordinal_t _current_ordinal = 0; +}; + +class NullMapOnlyFileColumnIterator final : public FileColumnIterator { +public: + explicit NullMapOnlyFileColumnIterator(std::shared_ptr reader) + : FileColumnIterator(std::move(reader)) {} + + void force_null_map_only() { _meta_read_mode = MetaReadMode::NULL_MAP_ONLY; } +}; + +MutableColumnPtr create_int_struct_column(size_t field_count) { + Columns columns; + for (size_t i = 0; i < field_count; ++i) { + columns.emplace_back(ColumnInt32::create()); + } + return ColumnStruct::create(std::move(columns)); +} + +MutableColumnPtr create_nullable_int_struct_column(size_t field_count) { + return ColumnNullable::create(create_int_struct_column(field_count), ColumnUInt8::create()); +} + +MutableColumnPtr create_nullable_int_array_column() { + return ColumnNullable::create( + ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create()), + ColumnUInt8::create()); +} + +MutableColumnPtr create_nullable_int_map_column() { + return ColumnNullable::create(ColumnMap::create(ColumnInt32::create(), ColumnInt32::create(), + ColumnArray::ColumnOffsets::create()), + ColumnUInt8::create()); +} + +struct TrackingOffsetIterator { + OffsetFileColumnIteratorUPtr iterator; + TrackingFileColumnIterator* tracker = nullptr; +}; + +TrackingOffsetIterator create_tracking_offset_iterator() { + auto file_iterator = std::make_unique(create_test_reader()); + auto* tracker = file_iterator.get(); + return {std::make_unique(std::move(file_iterator)), tracker}; +} +} // namespace + +static const std::string COLUMN_READER_FILE_TEST_DIR = "./ut_dir/column_reader_test"; + class ColumnReaderTest : public ::testing::Test { protected: - void SetUp() override {} - void TearDown() override {} + void SetUp() override { + _old_disable_storage_page_cache = config::disable_storage_page_cache; + config::disable_storage_page_cache = true; + auto st = io::global_local_filesystem()->delete_directory(COLUMN_READER_FILE_TEST_DIR); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = io::global_local_filesystem()->create_directory(COLUMN_READER_FILE_TEST_DIR); + ASSERT_TRUE(st.ok()) << st.to_string(); + } + + void TearDown() override { + EXPECT_TRUE( + io::global_local_filesystem()->delete_directory(COLUMN_READER_FILE_TEST_DIR).ok()); + config::disable_storage_page_cache = _old_disable_storage_page_cache; + } + +private: + bool _old_disable_storage_page_cache = false; }; +TEST_F(ColumnReaderTest, NullMapOnlyReadBySparseRowidsAcrossPages) { + ColumnMetaPB meta; + std::string fname = COLUMN_READER_FILE_TEST_DIR + "/null_map_only_sparse_rowids"; + auto fs = io::global_local_filesystem(); + + { + io::FileWriterPtr file_writer; + Status st = fs->create_file(fname, &file_writer); + ASSERT_TRUE(st.ok()) << st.to_string(); + + ColumnWriterOptions writer_opts; + writer_opts.meta = &meta; + writer_opts.meta->set_column_id(0); + writer_opts.meta->set_unique_id(0); + writer_opts.meta->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_INT)); + writer_opts.meta->set_length(0); + writer_opts.meta->set_encoding(PLAIN_ENCODING); + writer_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F); + writer_opts.meta->set_is_nullable(true); + writer_opts.data_page_size = sizeof(int32_t) * 2; + writer_opts.need_zone_map = false; + + TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_INT); + std::unique_ptr writer; + st = ColumnWriter::create(writer_opts, &column, file_writer.get(), &writer); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = writer->init(); + ASSERT_TRUE(st.ok()) << st.to_string(); + + for (int32_t i = 0; i < 6; ++i) { + st = writer->append(i == 2, &i); + ASSERT_TRUE(st.ok()) << st.to_string(); + } + + st = writer->finish(); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = writer->write_data(); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = writer->write_ordinal_index(); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = file_writer->close(); + ASSERT_TRUE(st.ok()) << st.to_string(); + } + + io::FileReaderSPtr file_reader; + auto st = fs->open_file(fname, &file_reader); + ASSERT_TRUE(st.ok()) << st.to_string(); + + ColumnReaderOptions reader_opts; + std::shared_ptr reader; + st = ColumnReader::create(reader_opts, meta, 6, file_reader, &reader); + ASSERT_TRUE(st.ok()) << st.to_string(); + + NullMapOnlyFileColumnIterator iter(reader); + ColumnIteratorOptions iter_opts; + OlapReaderStatistics stats; + iter_opts.stats = &stats; + iter_opts.file_reader = file_reader.get(); + st = iter.init(iter_opts); + ASSERT_TRUE(st.ok()) << st.to_string(); + iter.force_null_map_only(); + + MutableColumnPtr dst = ColumnNullable::create(ColumnInt32::create(), ColumnUInt8::create()); + const rowid_t rowids[] = {0, 2}; + st = iter.read_by_rowids(rowids, std::size(rowids), dst); + ASSERT_TRUE(st.ok()) << st.to_string(); + + ASSERT_EQ(2, dst->size()); + const auto& nullable_col = assert_cast(*dst); + const auto& null_map = nullable_col.get_null_map_data(); + ASSERT_EQ(2, null_map.size()); + EXPECT_EQ(0, null_map[0]); + EXPECT_EQ(1, null_map[1]); + EXPECT_EQ(2, nullable_col.get_nested_column().size()); +} + TEST_F(ColumnReaderTest, StructAccessPaths) { auto create_struct_iterator = []() { auto null_reader = std::make_shared(); @@ -69,7 +402,7 @@ TEST_F(ColumnReaderTest, StructAccessPaths) { auto st = iterator->set_access_paths(TColumnAccessPaths {}, TColumnAccessPaths {}); ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); - ASSERT_EQ(iterator->_reading_flag, ColumnIterator::ReadingFlag::NORMAL_READING); + ASSERT_EQ(iterator->_read_requirement, ColumnIterator::ReadRequirement::NORMAL); TColumnAccessPaths all_access_paths; all_access_paths.emplace_back(); @@ -82,7 +415,7 @@ TEST_F(ColumnReaderTest, StructAccessPaths) { ASSERT_FALSE(st.ok()); // Only reading sub_col_1 - // sub_col_2 should be set to SKIP_READING + // sub_col_2 should be set to SKIP all_access_paths[0].data_access_path.path = {"self", "sub_col_1"}; predicate_access_paths[0].data_access_path.path = {"self", "sub_col_1"}; @@ -95,12 +428,12 @@ TEST_F(ColumnReaderTest, StructAccessPaths) { // now column name is "self", should be ok st = iterator->set_access_paths(all_access_paths, predicate_access_paths); ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); - ASSERT_EQ(iterator->_reading_flag, ColumnIterator::ReadingFlag::READING_FOR_PREDICATE); + ASSERT_EQ(iterator->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); - ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag, - ColumnIterator::ReadingFlag::READING_FOR_PREDICATE); - ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag, - ColumnIterator::ReadingFlag::SKIP_READING); + ASSERT_EQ(iterator->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + ASSERT_EQ(iterator->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); // Reading all sub columns all_access_paths[0].data_access_path.path = {"self"}; @@ -109,12 +442,1463 @@ TEST_F(ColumnReaderTest, StructAccessPaths) { st = iterator->set_access_paths(all_access_paths, predicate_access_paths); ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); - ASSERT_EQ(iterator->_reading_flag, ColumnIterator::ReadingFlag::READING_FOR_PREDICATE); + ASSERT_EQ(iterator->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + + ASSERT_EQ(iterator->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + ASSERT_EQ(iterator->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); +} + +TEST_F(ColumnReaderTest, ReadPhaseMatrix) { + TestColumnIterator iterator; + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::SKIP); + iterator.set_read_phase(ColumnIterator::ReadPhase::NORMAL); + EXPECT_FALSE(iterator.need_to_read()); + EXPECT_FALSE(iterator.need_to_read_meta_columns()); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_TRUE(iterator.need_to_read()); + EXPECT_TRUE(iterator.need_to_read_meta_columns()); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::NORMAL); + iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + EXPECT_FALSE(iterator.need_to_read()); + EXPECT_FALSE(iterator.need_to_read_meta_columns()); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_FALSE(iterator.need_to_read()); + EXPECT_FALSE(iterator.need_to_read_meta_columns()); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_TRUE(iterator.need_to_read()); + EXPECT_TRUE(iterator.need_to_read_meta_columns()); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + EXPECT_FALSE(iterator.need_to_read()); + EXPECT_FALSE(iterator.need_to_read_meta_columns()); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::NORMAL); + EXPECT_FALSE(iterator.need_to_read()); + EXPECT_FALSE(iterator.need_to_read_meta_columns()); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_TRUE(iterator.need_to_read()); + EXPECT_TRUE(iterator.need_to_read_meta_columns()); +} + +TEST_F(ColumnReaderTest, ReadRequirementPriorityAndLazyOutput) { + TestColumnIterator iterator; + + iterator.set_read_requirement(ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(iterator.read_requirement(), ColumnIterator::ReadRequirement::SKIP); + + iterator.set_lazy_output_requirement(); + EXPECT_EQ(iterator.read_requirement(), ColumnIterator::ReadRequirement::LAZY_OUTPUT); + + iterator.set_read_requirement(ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(iterator.read_requirement(), ColumnIterator::ReadRequirement::LAZY_OUTPUT); + + iterator.set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(iterator.read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); + + iterator.set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(iterator.read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); +} + +TEST_F(ColumnReaderTest, MetaReadModePrefersOffsetOverNull) { + auto assert_meta_read_mode = [](TColumnAccessPaths access_paths, bool offset_only, + bool null_map_only) { + TestColumnIterator iterator; + iterator.check_and_set_meta_read_mode(ColumnIterator::ReadRequirement::NORMAL, + access_paths); + EXPECT_EQ(iterator.read_offset_only(), offset_only); + EXPECT_EQ(iterator.read_null_map_only(), null_map_only); + }; + + assert_meta_read_mode(TColumnAccessPaths {}, false, false); + assert_meta_read_mode(TColumnAccessPaths {create_access_path({ColumnIterator::ACCESS_OFFSET})}, + true, false); + assert_meta_read_mode(TColumnAccessPaths {create_access_path({ColumnIterator::ACCESS_NULL})}, + false, true); + assert_meta_read_mode(TColumnAccessPaths {create_access_path({ColumnIterator::ACCESS_OFFSET}), + create_access_path({ColumnIterator::ACCESS_NULL})}, + true, false); + assert_meta_read_mode(TColumnAccessPaths {create_access_path({"child"})}, false, false); + assert_meta_read_mode(TColumnAccessPaths {create_access_path({ColumnIterator::ACCESS_OFFSET}), + create_access_path({"child"})}, + false, false); + + { + TestColumnIterator iterator; + iterator.check_and_set_meta_read_mode( + ColumnIterator::ReadRequirement::LAZY_OUTPUT, + TColumnAccessPaths {create_access_path({ColumnIterator::ACCESS_NULL})}); + EXPECT_FALSE(iterator.read_null_map_only()); + EXPECT_FALSE(iterator.read_offset_only()); + } +} + +TEST_F(ColumnReaderTest, PlaceHolderLifecycleInLazyMode) { + TestColumnIterator iterator; + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + + MutableColumnPtr dst = ColumnInt32::create(); + iterator.convert_to_place_holder_column(dst, 3); + + EXPECT_EQ(3, dst->size()); + EXPECT_TRUE(iterator._has_place_holder_column); + + iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + iterator.finalize_lazy_phase(dst); + EXPECT_EQ(0, dst->size()); + EXPECT_FALSE(iterator._has_place_holder_column); + + MutableColumnPtr lazy_dst = ColumnInt32::create(); + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + iterator.convert_to_place_holder_column(lazy_dst, 4); + EXPECT_EQ(0, lazy_dst->size()); +} + +TEST_F(ColumnReaderTest, PlaceHolderRecoveryAfterColumnReplacement) { + TestColumnIterator iterator; + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + + MutableColumnPtr dst = ColumnInt32::create(); + iterator.convert_to_place_holder_column(dst, 3); + EXPECT_TRUE(iterator._has_place_holder_column); + + IColumn::Filter filter; + filter.resize(3); + filter[0] = 1; + filter[1] = 0; + filter[2] = 1; + dst = IColumn::mutate(dst->filter(filter, 2)); + EXPECT_EQ(2, dst->size()); + + iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + iterator.finalize_lazy_phase(dst); + EXPECT_EQ(0, dst->size()); + EXPECT_FALSE(iterator._has_place_holder_column); + + dst->insert_many_defaults(2); + iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + iterator.finalize_lazy_phase(dst); + EXPECT_EQ(2, dst->size()); +} + +TEST_F(ColumnReaderTest, SetReadRequirementPropagatesToNestedIterators) { + auto null_iter = std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + + auto sub_col = std::make_unique(std::make_shared()); + sub_col->set_column_name("sub_col"); + struct_sub_iters.emplace_back(std::move(sub_col)); + + auto array_item = std::make_unique(std::make_shared()); + array_item->set_column_name("item"); + auto array_offsets = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null = std::make_unique(std::make_shared()); + auto array_iter = std::make_unique( + std::make_shared(), std::move(array_offsets), std::move(array_item), + std::move(array_null)); + array_iter->set_column_name("arr"); + struct_sub_iters.emplace_back(std::move(array_iter)); + + StructFileColumnIterator struct_iter(std::make_shared(), std::move(null_iter), + std::move(struct_sub_iters)); + struct_iter.set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + + EXPECT_EQ(struct_iter.read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(struct_iter._sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* nested_array = + static_cast(struct_iter._sub_column_iterators[1].get()); + EXPECT_EQ(nested_array->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(nested_array->_item_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + auto map_val_iter = std::make_unique(std::make_shared()); + MapFileColumnIterator map_iter(std::make_shared(), std::move(map_null_iter), + std::move(map_offsets_iter), std::move(map_key_iter), + std::move(map_val_iter)); + map_iter.set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iter._key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iter._val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); +} + +TEST_F(ColumnReaderTest, SetReadRequirementSelfKeepsNestedIteratorRequirements) { + auto null_iter = std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + auto sub_iter = std::make_unique(std::make_shared()); + sub_iter->set_column_name("sub_col"); + struct_sub_iters.emplace_back(std::move(sub_iter)); + + StructFileColumnIterator struct_iter(std::make_shared(), std::move(null_iter), + std::move(struct_sub_iters)); + struct_iter.set_read_requirement_self(ColumnIterator::ReadRequirement::PREDICATE); + + EXPECT_EQ(struct_iter.read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(struct_iter._sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::NORMAL); +} + +TEST_F(ColumnReaderTest, RemovePrunedSubIterators) { + auto struct_null_iter = std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + auto sub_keep = std::make_unique(std::make_shared()); + sub_keep->set_column_name("keep"); + auto sub_prune = std::make_unique(std::make_shared()); + sub_prune->set_column_name("prune"); + sub_prune->set_read_requirement(ColumnIterator::ReadRequirement::SKIP); + struct_sub_iters.emplace_back(std::move(sub_keep)); + struct_sub_iters.emplace_back(std::move(sub_prune)); + + auto array_item_null = std::make_unique(std::make_shared()); + std::vector item_struct_sub_iters; + auto item_keep = std::make_unique(std::make_shared()); + item_keep->set_column_name("keep"); + auto item_prune = std::make_unique(std::make_shared()); + item_prune->set_column_name("prune"); + item_prune->set_read_requirement(ColumnIterator::ReadRequirement::SKIP); + item_struct_sub_iters.emplace_back(std::move(item_keep)); + item_struct_sub_iters.emplace_back(std::move(item_prune)); + auto item_struct = std::make_unique(std::make_shared(), + std::move(array_item_null), + std::move(item_struct_sub_iters)); + + auto array_offsets = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null = std::make_unique(std::make_shared()); + auto array_iter = std::make_unique( + std::make_shared(), std::move(array_offsets), std::move(item_struct), + std::move(array_null)); + struct_sub_iters.emplace_back(std::move(array_iter)); + + StructFileColumnIterator struct_iter(std::make_shared(), + std::move(struct_null_iter), std::move(struct_sub_iters)); + ASSERT_EQ(3, struct_iter._sub_column_iterators.size()); + struct_iter.remove_pruned_sub_iterators(); + ASSERT_EQ(2, struct_iter._sub_column_iterators.size()); + + auto* nested_array = + static_cast(struct_iter._sub_column_iterators[1].get()); + auto* nested_struct = + static_cast(nested_array->_item_iterator.get()); + ASSERT_EQ(1, nested_struct->_sub_column_iterators.size()); + EXPECT_EQ(nested_struct->_sub_column_iterators[0]->column_name(), "keep"); +} + +TEST_F(ColumnReaderTest, FinalizeLazyModeOnNestedStruct) { + auto sub_iter = std::make_unique(); + auto* sub_iter_ptr = sub_iter.get(); + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + sub_iters.emplace_back(std::move(sub_iter)); + + StructFileColumnIterator struct_iter(std::make_shared(), std::move(null_iter), + std::move(sub_iters)); + sub_iter_ptr->set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + struct_iter.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + sub_iter_ptr->set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + + MutableColumnPtr nested_column = ColumnInt32::create(); + MutableColumnPtr nested_mut = IColumn::mutate(std::move(nested_column)); + sub_iter_ptr->convert_to_place_holder_column(nested_mut, 5); + EXPECT_EQ(5, nested_mut->size()); + + Columns struct_columns; + struct_columns.emplace_back(std::move(nested_mut)); + auto struct_column = ColumnStruct::create(struct_columns); + MutableColumnPtr struct_mut = std::move(struct_column); + struct_iter.set_read_phase(ColumnIterator::ReadPhase::LAZY); + sub_iter_ptr->set_read_phase(ColumnIterator::ReadPhase::LAZY); + struct_iter.finalize_lazy_phase(struct_mut); + + auto& column_struct = assert_cast(*struct_mut); + auto nested_after = column_struct.get_column_ptr(0); + EXPECT_EQ(0, nested_after->size()); +} + +TEST_F(ColumnReaderTest, GetSubAccessPathsSetsPredicateFlag) { + TestColumnIterator iterator; + iterator.set_column_name("self"); + + TColumnAccessPaths access_paths; + access_paths.emplace_back(); + access_paths[0].data_access_path.path = {"self"}; + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::NORMAL); + auto sub_paths = TEST_TRY(iterator.get_sub_access_paths(access_paths)); + EXPECT_TRUE(sub_paths.empty()); + EXPECT_EQ(iterator._read_requirement, ColumnIterator::ReadRequirement::LAZY_OUTPUT); + + iterator.force_set_read_requirement(ColumnIterator::ReadRequirement::NORMAL); + sub_paths = TEST_TRY(iterator.get_sub_access_paths(access_paths, true)); + EXPECT_TRUE(sub_paths.empty()); + EXPECT_EQ(iterator._read_requirement, ColumnIterator::ReadRequirement::PREDICATE); +} + +TEST_F(ColumnReaderTest, NestedIteratorsPropagateReadPhase) { + auto struct_null_iterator = + std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + struct_sub_iters.emplace_back( + std::make_unique(std::make_shared())); + struct_sub_iters.emplace_back( + std::make_unique(std::make_shared())); + auto struct_iterator = std::make_unique( + std::make_shared(), std::move(struct_null_iterator), + std::move(struct_sub_iters)); + + struct_iterator->set_read_phase(ColumnIterator::ReadPhase::LAZY); + EXPECT_EQ(struct_iterator->_sub_column_iterators[0]->_read_phase, + ColumnIterator::ReadPhase::LAZY); + EXPECT_EQ(struct_iterator->_sub_column_iterators[1]->_read_phase, + ColumnIterator::ReadPhase::LAZY); + + auto array_item_iterator = + std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null_iter = std::make_unique(std::make_shared()); + ArrayFileColumnIterator array_iterator( + std::make_shared(), std::move(array_offsets_iter), + std::move(array_item_iterator), std::move(array_null_iter)); + array_iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + EXPECT_EQ(array_iterator._item_iterator->_read_phase, ColumnIterator::ReadPhase::PREDICATE); + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + auto map_val_iter = std::make_unique(std::make_shared()); + MapFileColumnIterator map_iterator(std::make_shared(), std::move(map_null_iter), + std::move(map_offsets_iter), std::move(map_key_iter), + std::move(map_val_iter)); + map_iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + EXPECT_EQ(map_iterator._key_iterator->_read_phase, ColumnIterator::ReadPhase::LAZY); + EXPECT_EQ(map_iterator._val_iterator->_read_phase, ColumnIterator::ReadPhase::LAZY); +} + +TEST_F(ColumnReaderTest, AccessPathsPropagatePredicateToChildren) { + auto struct_null_iterator = + std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + struct_sub_iters.emplace_back( + std::make_unique(std::make_shared())); + struct_sub_iters.emplace_back( + std::make_unique(std::make_shared())); + auto struct_iterator = std::make_unique( + std::make_shared(), std::move(struct_null_iterator), + std::move(struct_sub_iters)); + struct_iterator->set_column_name("s"); + + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"s"}; + TColumnAccessPaths predicate_access_paths = all_access_paths; + + auto st = struct_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set struct access paths: " << st.to_string(); + EXPECT_EQ(struct_iterator->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(struct_iterator->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(struct_iterator->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto array_item_iterator = + std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null_iter = std::make_unique(std::make_shared()); + ArrayFileColumnIterator array_iterator( + std::make_shared(), std::move(array_offsets_iter), + std::move(array_item_iterator), std::move(array_null_iter)); + array_iterator.set_column_name("a"); + TColumnAccessPaths array_access_paths; + array_access_paths.emplace_back(); + array_access_paths[0].data_access_path.path = {"a"}; + TColumnAccessPaths array_predicate_paths = array_access_paths; + st = array_iterator.set_access_paths(array_access_paths, array_predicate_paths); + ASSERT_TRUE(st.ok()) << "failed to set array access paths: " << st.to_string(); + EXPECT_EQ(array_iterator._read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(array_iterator._item_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + auto map_val_iter = std::make_unique(std::make_shared()); + MapFileColumnIterator map_iterator(std::make_shared(), std::move(map_null_iter), + std::move(map_offsets_iter), std::move(map_key_iter), + std::move(map_val_iter)); + map_iterator.set_column_name("m"); + TColumnAccessPaths map_access_paths; + map_access_paths.emplace_back(); + map_access_paths[0].data_access_path.path = {"m"}; + TColumnAccessPaths map_predicate_paths = map_access_paths; + st = map_iterator.set_access_paths(map_access_paths, map_predicate_paths); + ASSERT_TRUE(st.ok()) << "failed to set map access paths: " << st.to_string(); + EXPECT_EQ(map_iterator._read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iterator._key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iterator._val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); +} + +TEST_F(ColumnReaderTest, StructPredicateOnlyChildPathStillRoutesToChild) { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto sub_a = std::make_unique(std::make_shared()); + sub_a->set_column_name("a"); + auto sub_b = std::make_unique(std::make_shared()); + sub_b->set_column_name("b"); + sub_iters.emplace_back(std::move(sub_a)); + sub_iters.emplace_back(std::move(sub_b)); + + StructFileColumnIterator struct_iterator(std::make_shared(), std::move(null_iter), + std::move(sub_iters)); + struct_iterator.set_column_name("s"); + + TColumnAccessPaths all_access_paths {create_access_path({"s", "a"})}; + TColumnAccessPaths predicate_access_paths {create_access_path({"s", "b"})}; + + auto st = struct_iterator.set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set struct access paths: " << st.to_string(); + + EXPECT_EQ(struct_iterator._read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(struct_iterator._sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(struct_iterator._sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + struct_iterator.remove_pruned_sub_iterators(); + ASSERT_EQ(struct_iterator._sub_column_iterators.size(), 2); + EXPECT_EQ(struct_iterator._sub_column_iterators[0]->column_name(), "a"); + EXPECT_EQ(struct_iterator._sub_column_iterators[1]->column_name(), "b"); +} + +TEST_F(ColumnReaderTest, CurrentLevelPredicateNullPathUsesMetaOnlyMode) { + auto make_struct_iterator = []() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto sub_a = std::make_unique(std::make_shared()); + sub_a->set_column_name("a"); + auto sub_b = std::make_unique(std::make_shared()); + sub_b->set_column_name("b"); + sub_iters.emplace_back(std::move(sub_a)); + sub_iters.emplace_back(std::move(sub_b)); + + auto struct_iterator = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + struct_iterator->set_column_name("s"); + return struct_iterator; + }; + + { + auto struct_iterator = make_struct_iterator(); + TColumnAccessPaths all_access_paths { + create_access_path({"s", ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"s", ColumnIterator::ACCESS_NULL})}; + + auto st = struct_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set struct access paths: " << st.to_string(); + + EXPECT_TRUE(struct_iterator->read_null_map_only()); + EXPECT_EQ(struct_iterator->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(struct_iterator->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + + struct_iterator->remove_pruned_sub_iterators(); + EXPECT_TRUE(struct_iterator->_sub_column_iterators.empty()); + } + + { + auto struct_iterator = make_struct_iterator(); + TColumnAccessPaths all_access_paths { + create_access_path({"s"}), create_access_path({"s", ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"s", ColumnIterator::ACCESS_NULL})}; + + auto st = struct_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set struct access paths: " << st.to_string(); + + EXPECT_FALSE(struct_iterator->read_null_map_only()); + EXPECT_EQ(struct_iterator->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(struct_iterator->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + } + + { + auto array_item_iterator = + std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null_iter = + std::make_unique(std::make_shared()); + ArrayFileColumnIterator array_iterator( + std::make_shared(), std::move(array_offsets_iter), + std::move(array_item_iterator), std::move(array_null_iter)); + array_iterator.set_column_name("a"); + + TColumnAccessPaths all_access_paths { + create_access_path({"a", ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"a", ColumnIterator::ACCESS_NULL})}; + + auto st = array_iterator.set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set array access paths: " << st.to_string(); + EXPECT_TRUE(array_iterator.read_null_map_only()); + EXPECT_EQ(array_iterator._item_iterator->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + } + + { + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + auto map_val_iter = std::make_unique(std::make_shared()); + MapFileColumnIterator map_iterator(std::make_shared(), + std::move(map_null_iter), std::move(map_offsets_iter), + std::move(map_key_iter), std::move(map_val_iter)); + map_iterator.set_column_name("m"); + + TColumnAccessPaths all_access_paths { + create_access_path({"m", ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"m", ColumnIterator::ACCESS_NULL})}; + + auto st = map_iterator.set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set map access paths: " << st.to_string(); + EXPECT_TRUE(map_iterator.read_null_map_only()); + EXPECT_EQ(map_iterator._key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(map_iterator._val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + } +} + +TEST_F(ColumnReaderTest, StructPredicateMetaPathDoesNotOverrideExistingDataNeed) { + auto make_struct_iterator = []() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto city_iter = + std::make_unique(std::make_shared()); + city_iter->set_column_name("city"); + auto data_iter = std::make_unique(std::make_shared()); + data_iter->set_column_name("data"); + sub_iters.emplace_back(std::move(city_iter)); + sub_iters.emplace_back(std::move(data_iter)); + auto struct_iterator = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + struct_iterator->set_column_name("s"); + return struct_iterator; + }; + + auto struct_iterator = make_struct_iterator(); + TColumnAccessPaths all_access_paths { + create_access_path({"s"}), + create_access_path({"s", "city", ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"s", "city", ColumnIterator::ACCESS_NULL})}; + + auto st = struct_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set struct access paths: " << st.to_string(); + + auto* city_iter = + static_cast(struct_iterator->_sub_column_iterators[0].get()); + EXPECT_FALSE(city_iter->read_null_map_only()); + EXPECT_EQ(city_iter->read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(struct_iterator->_sub_column_iterators[1]->read_requirement(), + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + + struct_iterator = make_struct_iterator(); + all_access_paths = {create_access_path({"s", "city", ColumnIterator::ACCESS_NULL})}; + predicate_access_paths = all_access_paths; + st = struct_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set predicate-only struct access paths: " << st.to_string(); + city_iter = + static_cast(struct_iterator->_sub_column_iterators[0].get()); + EXPECT_TRUE(city_iter->read_null_map_only()); + EXPECT_EQ(struct_iterator->_sub_column_iterators[1]->read_requirement(), + ColumnIterator::ReadRequirement::SKIP); +} + +TEST_F(ColumnReaderTest, ArrayPredicateMetaPathDoesNotOverrideExistingDataNeed) { + auto make_array_iterator = []() { + auto item_iter = + std::make_unique(std::make_shared()); + item_iter->set_column_name("item"); + auto offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto null_iter = std::make_unique(std::make_shared()); + auto array_iterator = std::make_unique( + std::make_shared(), std::move(offsets_iter), std::move(item_iter), + std::move(null_iter)); + array_iterator->set_column_name("a"); + return array_iterator; + }; + + auto array_iterator = make_array_iterator(); + TColumnAccessPaths all_access_paths { + create_access_path({"a"}), + create_access_path({"a", ColumnIterator::ACCESS_ALL, ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"a", ColumnIterator::ACCESS_ALL, ColumnIterator::ACCESS_NULL})}; + + auto st = array_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set array access paths: " << st.to_string(); + auto* item_iter = static_cast(array_iterator->_item_iterator.get()); + EXPECT_FALSE(item_iter->read_null_map_only()); + EXPECT_EQ(item_iter->read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); - ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag, - ColumnIterator::ReadingFlag::READING_FOR_PREDICATE); - ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag, - ColumnIterator::ReadingFlag::NEED_TO_READ); + array_iterator = make_array_iterator(); + all_access_paths = { + create_access_path({"a", ColumnIterator::ACCESS_ALL, ColumnIterator::ACCESS_NULL})}; + predicate_access_paths = all_access_paths; + st = array_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set predicate-only array access paths: " << st.to_string(); + item_iter = static_cast(array_iterator->_item_iterator.get()); + EXPECT_TRUE(item_iter->read_null_map_only()); +} + +TEST_F(ColumnReaderTest, MapPredicateMetaPathDoesNotOverrideExistingDataNeed) { + auto make_map_iterator = []() { + auto null_iter = std::make_unique(std::make_shared()); + auto offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto key_iter = std::make_unique(std::make_shared()); + key_iter->set_column_name("key"); + auto value_iter = + std::make_unique(std::make_shared()); + value_iter->set_column_name("value"); + auto map_iterator = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(offsets_iter), + std::move(key_iter), std::move(value_iter)); + map_iterator->set_column_name("m"); + return map_iterator; + }; + + auto map_iterator = make_map_iterator(); + TColumnAccessPaths all_access_paths {create_access_path({"m"}), + create_access_path({"m", ColumnIterator::ACCESS_MAP_VALUES, + ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths {create_access_path( + {"m", ColumnIterator::ACCESS_MAP_VALUES, ColumnIterator::ACCESS_NULL})}; + + auto st = map_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set map access paths: " << st.to_string(); + auto* value_iter = static_cast(map_iterator->_val_iterator.get()); + EXPECT_FALSE(value_iter->read_null_map_only()); + EXPECT_EQ(map_iterator->_key_iterator->read_requirement(), + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(value_iter->read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); + + map_iterator = make_map_iterator(); + all_access_paths = {create_access_path( + {"m", ColumnIterator::ACCESS_MAP_VALUES, ColumnIterator::ACCESS_NULL})}; + predicate_access_paths = all_access_paths; + st = map_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set predicate-only map access paths: " << st.to_string(); + value_iter = static_cast(map_iterator->_val_iterator.get()); + EXPECT_TRUE(value_iter->read_null_map_only()); + EXPECT_EQ(map_iterator->_key_iterator->read_requirement(), + ColumnIterator::ReadRequirement::SKIP); +} + +TEST_F(ColumnReaderTest, MapFullProjectionStillRoutesPredicateSubPaths) { + auto make_value_struct = []() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto sub_a = std::make_unique(std::make_shared()); + sub_a->set_column_name("a"); + auto sub_b = std::make_unique(std::make_shared()); + sub_b->set_column_name("b"); + sub_iters.emplace_back(std::move(sub_a)); + sub_iters.emplace_back(std::move(sub_b)); + + auto value_struct = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + value_struct->set_column_name("value"); + return value_struct; + }; + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + map_key_iter->set_column_name("key"); + auto map_iterator = std::make_unique( + std::make_shared(), std::move(map_null_iter), std::move(map_offsets_iter), + std::move(map_key_iter), make_value_struct()); + map_iterator->set_column_name("m"); + + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"m"}; + + TColumnAccessPaths predicate_access_paths; + predicate_access_paths.emplace_back(); + predicate_access_paths[0].data_access_path.path = {"m", "KEYS"}; + predicate_access_paths.emplace_back(); + predicate_access_paths[1].data_access_path.path = {"m", "VALUES", "a"}; + + auto st = map_iterator->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set map access paths: " << st.to_string(); + + EXPECT_EQ(map_iterator->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iterator->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iterator->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* value_struct = static_cast(map_iterator->_val_iterator.get()); + EXPECT_EQ(value_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(value_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); +} + +TEST_F(ColumnReaderTest, MetaOnlyAllPathsStillRoutePredicateSubPaths) { + { + auto array_item_iterator = + std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null_iter = + std::make_unique(std::make_shared()); + ArrayFileColumnIterator array_iterator( + std::make_shared(), std::move(array_offsets_iter), + std::move(array_item_iterator), std::move(array_null_iter)); + array_iterator.set_column_name("a"); + + TColumnAccessPaths all_access_paths { + create_access_path({"a", ColumnIterator::ACCESS_OFFSET}), + create_access_path({"a", ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"a", ColumnIterator::ACCESS_ALL})}; + + auto st = array_iterator.set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set array access paths: " << st.to_string(); + EXPECT_FALSE(array_iterator.read_offset_only()); + EXPECT_EQ(array_iterator._item_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + } + + { + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + auto map_val_iter = std::make_unique(std::make_shared()); + MapFileColumnIterator map_iterator(std::make_shared(), + std::move(map_null_iter), std::move(map_offsets_iter), + std::move(map_key_iter), std::move(map_val_iter)); + map_iterator.set_column_name("m"); + + TColumnAccessPaths all_access_paths { + create_access_path({"m", ColumnIterator::ACCESS_OFFSET}), + create_access_path({"m", ColumnIterator::ACCESS_NULL})}; + TColumnAccessPaths predicate_access_paths { + create_access_path({"m", ColumnIterator::ACCESS_MAP_KEYS})}; + + auto st = map_iterator.set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set map access paths: " << st.to_string(); + EXPECT_FALSE(map_iterator.read_offset_only()); + EXPECT_EQ(map_iterator._key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iterator._val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + } +} + +TEST_F(ColumnReaderTest, NestedStructArrayMapStructAccessPaths) { + auto make_value_struct = []() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto sub_a = std::make_unique(std::make_shared()); + sub_a->set_column_name("a"); + auto sub_b = std::make_unique(std::make_shared()); + sub_b->set_column_name("b"); + sub_iters.emplace_back(std::move(sub_a)); + sub_iters.emplace_back(std::move(sub_b)); + + auto value_struct = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + value_struct->set_column_name("value"); + return value_struct; + }; + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + map_key_iter->set_column_name("key"); + auto map_val_iter = make_value_struct(); + auto map_iterator = std::make_unique( + std::make_shared(), std::move(map_null_iter), std::move(map_offsets_iter), + std::move(map_key_iter), std::move(map_val_iter)); + map_iterator->set_column_name("item"); + + auto array_null_iter = std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_iterator = std::make_unique( + std::make_shared(), std::move(array_offsets_iter), + std::move(map_iterator), std::move(array_null_iter)); + array_iterator->set_column_name("col2"); + + auto struct_null_iter = std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + auto sub_col1 = std::make_unique(std::make_shared()); + sub_col1->set_column_name("col1"); + struct_sub_iters.emplace_back(std::move(sub_col1)); + struct_sub_iters.emplace_back(std::move(array_iterator)); + auto top_struct = std::make_unique(std::make_shared(), + std::move(struct_null_iter), + std::move(struct_sub_iters)); + top_struct->set_column_name("root"); + + TColumnAccessPaths access_paths; + access_paths.emplace_back(); + access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES", "a"}; + TColumnAccessPaths predicate_access_paths = access_paths; + + auto st = top_struct->set_access_paths(access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set nested access paths: " << st.to_string(); + + EXPECT_EQ(top_struct->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(top_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(top_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(map_iter->_key_iterator->_read_requirement, ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(map_iter->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* value_struct = static_cast(map_iter->_val_iterator.get()); + EXPECT_EQ(value_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(value_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); +} + +TEST_F(ColumnReaderTest, NestedStructArrayMapStructAccessPathsVariants) { + auto build_nested_iterator = []() { + auto make_value_struct = []() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto sub_a = std::make_unique(std::make_shared()); + sub_a->set_column_name("a"); + auto sub_b = std::make_unique(std::make_shared()); + sub_b->set_column_name("b"); + sub_iters.emplace_back(std::move(sub_a)); + sub_iters.emplace_back(std::move(sub_b)); + + auto value_struct = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + value_struct->set_column_name("value"); + return value_struct; + }; + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + map_key_iter->set_column_name("key"); + auto map_val_iter = make_value_struct(); + auto map_iterator = std::make_unique( + std::make_shared(), std::move(map_null_iter), + std::move(map_offsets_iter), std::move(map_key_iter), std::move(map_val_iter)); + map_iterator->set_column_name("item"); + + auto array_null_iter = + std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_iterator = std::make_unique( + std::make_shared(), std::move(array_offsets_iter), + std::move(map_iterator), std::move(array_null_iter)); + array_iterator->set_column_name("col2"); + + auto struct_null_iter = + std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + auto sub_col1 = std::make_unique(std::make_shared()); + sub_col1->set_column_name("col1"); + struct_sub_iters.emplace_back(std::move(sub_col1)); + struct_sub_iters.emplace_back(std::move(array_iterator)); + auto top_struct = std::make_unique( + std::make_shared(), std::move(struct_null_iter), + std::move(struct_sub_iters)); + top_struct->set_column_name("root"); + return top_struct; + }; + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col1"}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + + EXPECT_EQ(top_struct->_read_requirement, ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(top_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(top_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*", "KEYS"}; + TColumnAccessPaths predicate_access_paths; + predicate_access_paths.emplace_back(); + predicate_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES", "b"}; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + EXPECT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2"}; + TColumnAccessPaths predicate_access_paths = all_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + + EXPECT_EQ(top_struct->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(top_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(map_iter->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(map_iter->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* value_struct = static_cast(map_iter->_val_iterator.get()); + EXPECT_EQ(value_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(value_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + TColumnAccessPaths predicate_access_paths; + predicate_access_paths.emplace_back(); + predicate_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES", "a"}; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + EXPECT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + } + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*", "KEYS"}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(map_iter->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(map_iter->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES"}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(map_iter->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(map_iter->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + + auto* value_struct = static_cast(map_iter->_val_iterator.get()); + EXPECT_EQ(value_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(value_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*"}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(map_iter->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(map_iter->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + + auto* value_struct = static_cast(map_iter->_val_iterator.get()); + EXPECT_EQ(value_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(value_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES", "a"}; + TColumnAccessPaths predicate_access_paths = all_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + + EXPECT_EQ(top_struct->_read_requirement, ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(top_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(map_iter->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(map_iter->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* value_struct = static_cast(map_iter->_val_iterator.get()); + EXPECT_EQ(value_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_EQ(value_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*"}; + TColumnAccessPaths predicate_access_paths; + predicate_access_paths.emplace_back(); + predicate_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES"}; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(map_iter->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(map_iter->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + EXPECT_FALSE(st.ok()); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"wrong_root", "col2"}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + EXPECT_FALSE(st.ok()); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "wrong_item"}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + EXPECT_FALSE(st.ok()); + } +} + +TEST_F(ColumnReaderTest, DeepNestedAccessPathsFiveLevels) { + auto make_item_struct = []() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto sub_p = std::make_unique(std::make_shared()); + sub_p->set_column_name("p"); + auto sub_q = std::make_unique(std::make_shared()); + sub_q->set_column_name("q"); + sub_iters.emplace_back(std::move(sub_p)); + sub_iters.emplace_back(std::move(sub_q)); + + auto item_struct = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + item_struct->set_column_name("item"); + return item_struct; + }; + + auto make_value_struct = [make_item_struct]() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto array_offsets = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null = std::make_unique(std::make_shared()); + auto array_iter = std::make_unique( + std::make_shared(), std::move(array_offsets), make_item_struct(), + std::move(array_null)); + array_iter->set_column_name("arr"); + sub_iters.emplace_back(std::move(array_iter)); + + auto sub_z = std::make_unique(std::make_shared()); + sub_z->set_column_name("z"); + sub_iters.emplace_back(std::move(sub_z)); + + auto value_struct = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + value_struct->set_column_name("value"); + return value_struct; + }; + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + map_key_iter->set_column_name("key"); + auto map_val_iter = make_value_struct(); + auto map_iter = std::make_unique( + std::make_shared(), std::move(map_null_iter), std::move(map_offsets_iter), + std::move(map_key_iter), std::move(map_val_iter)); + map_iter->set_column_name("m"); + + auto struct_null_iter = std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + auto sub_x = std::make_unique(std::make_shared()); + sub_x->set_column_name("x"); + struct_sub_iters.emplace_back(std::move(sub_x)); + struct_sub_iters.emplace_back(std::move(map_iter)); + auto top_struct = std::make_unique(std::make_shared(), + std::move(struct_null_iter), + std::move(struct_sub_iters)); + top_struct->set_column_name("root"); + + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "m", "VALUES", "arr", "*"}; + TColumnAccessPaths predicate_access_paths; + predicate_access_paths.emplace_back(); + predicate_access_paths[0].data_access_path.path = {"root", "m", "VALUES", "arr", "*", "q"}; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set deep access paths: " << st.to_string(); + + auto* map_ptr = static_cast(top_struct->_sub_column_iterators[1].get()); + EXPECT_EQ(map_ptr->_key_iterator->_read_requirement, ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(map_ptr->_val_iterator->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); + + auto* value_struct = static_cast(map_ptr->_val_iterator.get()); + auto* array_iter = + static_cast(value_struct->_sub_column_iterators[0].get()); + auto* item_struct = static_cast(array_iter->_item_iterator.get()); + EXPECT_EQ(item_struct->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + EXPECT_EQ(item_struct->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::PREDICATE); +} + +TEST_F(ColumnReaderTest, NestedLazyOutputInLazyPredicatePhase) { + auto struct_null_iterator = + std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + struct_sub_iters.emplace_back( + std::make_unique(std::make_shared())); + struct_sub_iters.emplace_back( + std::make_unique(std::make_shared())); + StructFileColumnIterator struct_iterator(std::make_shared(), + std::move(struct_null_iterator), + std::move(struct_sub_iters)); + struct_iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + struct_iterator.set_read_requirement_self(ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_FALSE(struct_iterator.has_lazy_read_target()); + EXPECT_FALSE(struct_iterator.need_to_read()); + struct_iterator._sub_column_iterators[0]->set_lazy_output_requirement(); + EXPECT_TRUE(struct_iterator.has_lazy_read_target()); + EXPECT_TRUE(struct_iterator.need_to_read()); + + auto array_item_iterator = + std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_null_iter = std::make_unique(std::make_shared()); + ArrayFileColumnIterator array_iterator( + std::make_shared(), std::move(array_offsets_iter), + std::move(array_item_iterator), std::move(array_null_iter)); + array_iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + array_iterator.set_read_requirement_self(ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_FALSE(array_iterator.has_lazy_read_target()); + EXPECT_FALSE(array_iterator.need_to_read()); + array_iterator._item_iterator->set_lazy_output_requirement(); + EXPECT_TRUE(array_iterator.has_lazy_read_target()); + EXPECT_TRUE(array_iterator.need_to_read()); + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + auto map_val_iter = std::make_unique(std::make_shared()); + MapFileColumnIterator map_iterator(std::make_shared(), std::move(map_null_iter), + std::move(map_offsets_iter), std::move(map_key_iter), + std::move(map_val_iter)); + map_iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + map_iterator.set_read_requirement_self(ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_FALSE(map_iterator.has_lazy_read_target()); + EXPECT_FALSE(map_iterator.need_to_read()); + map_iterator._val_iterator->set_lazy_output_requirement(); + EXPECT_TRUE(map_iterator.has_lazy_read_target()); + EXPECT_TRUE(map_iterator.need_to_read()); +} + +TEST_F(ColumnReaderTest, NestedReadPhaseLazyOutputMatrix) { + auto build_nested_iterator = []() { + auto make_value_struct = []() { + auto null_iter = std::make_unique(std::make_shared()); + std::vector sub_iters; + auto sub_a = std::make_unique(std::make_shared()); + sub_a->set_column_name("a"); + auto sub_b = std::make_unique(std::make_shared()); + sub_b->set_column_name("b"); + sub_iters.emplace_back(std::move(sub_a)); + sub_iters.emplace_back(std::move(sub_b)); + + auto value_struct = std::make_unique( + std::make_shared(), std::move(null_iter), std::move(sub_iters)); + value_struct->set_column_name("value"); + return value_struct; + }; + + auto map_null_iter = std::make_unique(std::make_shared()); + auto map_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto map_key_iter = std::make_unique(std::make_shared()); + map_key_iter->set_column_name("key"); + auto map_val_iter = make_value_struct(); + auto map_iterator = std::make_unique( + std::make_shared(), std::move(map_null_iter), + std::move(map_offsets_iter), std::move(map_key_iter), std::move(map_val_iter)); + map_iterator->set_column_name("item"); + + auto array_null_iter = + std::make_unique(std::make_shared()); + auto array_offsets_iter = std::make_unique( + std::make_unique(std::make_shared())); + auto array_iterator = std::make_unique( + std::make_shared(), std::move(array_offsets_iter), + std::move(map_iterator), std::move(array_null_iter)); + array_iterator->set_column_name("col2"); + + auto struct_null_iter = + std::make_unique(std::make_shared()); + std::vector struct_sub_iters; + auto sub_col1 = std::make_unique(std::make_shared()); + sub_col1->set_column_name("col1"); + struct_sub_iters.emplace_back(std::move(sub_col1)); + struct_sub_iters.emplace_back(std::move(array_iterator)); + auto top_struct = std::make_unique( + std::make_shared(), std::move(struct_null_iter), + std::move(struct_sub_iters)); + top_struct->set_column_name("root"); + return top_struct; + }; + + auto assert_need_to_read = [](StructFileColumnIterator* top_struct) { + auto* array_iter = + static_cast(top_struct->_sub_column_iterators[1].get()); + auto* map_iter = static_cast(array_iter->_item_iterator.get()); + auto* value_struct = static_cast(map_iter->_val_iterator.get()); + auto expect_scalar = [](ColumnIterator::ReadRequirement requirement, + ColumnIterator::ReadPhase mode) { + switch (mode) { + case ColumnIterator::ReadPhase::NORMAL: + return requirement != ColumnIterator::ReadRequirement::SKIP; + case ColumnIterator::ReadPhase::PREDICATE: + return requirement == ColumnIterator::ReadRequirement::PREDICATE; + case ColumnIterator::ReadPhase::LAZY: + return requirement == ColumnIterator::ReadRequirement::LAZY_OUTPUT; + default: + return false; + } + }; + auto expect_nested = [](ColumnIterator::ReadRequirement requirement, + ColumnIterator::ReadPhase mode) { + switch (mode) { + case ColumnIterator::ReadPhase::NORMAL: + return requirement != ColumnIterator::ReadRequirement::SKIP; + case ColumnIterator::ReadPhase::PREDICATE: + return requirement == ColumnIterator::ReadRequirement::PREDICATE; + default: + return false; + } + }; + + top_struct->set_read_phase(ColumnIterator::ReadPhase::NORMAL); + EXPECT_EQ(expect_nested(top_struct->read_requirement(), ColumnIterator::ReadPhase::NORMAL), + top_struct->need_to_read()); + EXPECT_EQ(expect_nested(array_iter->read_requirement(), ColumnIterator::ReadPhase::NORMAL), + array_iter->need_to_read()); + EXPECT_EQ(expect_nested(map_iter->read_requirement(), ColumnIterator::ReadPhase::NORMAL), + map_iter->need_to_read()); + EXPECT_EQ( + expect_nested(value_struct->read_requirement(), ColumnIterator::ReadPhase::NORMAL), + value_struct->need_to_read()); + EXPECT_EQ(expect_scalar(map_iter->_key_iterator->read_requirement(), + ColumnIterator::ReadPhase::NORMAL), + map_iter->_key_iterator->need_to_read()); + EXPECT_EQ(expect_nested(map_iter->_val_iterator->read_requirement(), + ColumnIterator::ReadPhase::NORMAL), + map_iter->_val_iterator->need_to_read()); + + top_struct->set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + EXPECT_EQ( + expect_nested(top_struct->read_requirement(), ColumnIterator::ReadPhase::PREDICATE), + top_struct->need_to_read()); + EXPECT_EQ( + expect_nested(array_iter->read_requirement(), ColumnIterator::ReadPhase::PREDICATE), + array_iter->need_to_read()); + EXPECT_EQ(expect_nested(map_iter->read_requirement(), ColumnIterator::ReadPhase::PREDICATE), + map_iter->need_to_read()); + EXPECT_EQ(expect_nested(value_struct->read_requirement(), + ColumnIterator::ReadPhase::PREDICATE), + value_struct->need_to_read()); + EXPECT_EQ(expect_scalar(map_iter->_key_iterator->read_requirement(), + ColumnIterator::ReadPhase::PREDICATE), + map_iter->_key_iterator->need_to_read()); + EXPECT_EQ(expect_nested(map_iter->_val_iterator->read_requirement(), + ColumnIterator::ReadPhase::PREDICATE), + map_iter->_val_iterator->need_to_read()); + + top_struct->set_read_phase(ColumnIterator::ReadPhase::LAZY); + EXPECT_EQ(top_struct->has_lazy_read_target(), top_struct->need_to_read()); + EXPECT_EQ(array_iter->has_lazy_read_target(), array_iter->need_to_read()); + EXPECT_EQ(map_iter->has_lazy_read_target(), map_iter->need_to_read()); + EXPECT_EQ(value_struct->has_lazy_read_target(), value_struct->need_to_read()); + EXPECT_EQ(expect_scalar(map_iter->_key_iterator->read_requirement(), + ColumnIterator::ReadPhase::LAZY), + map_iter->_key_iterator->need_to_read()); + EXPECT_EQ(map_iter->_val_iterator->has_lazy_read_target(), + map_iter->_val_iterator->need_to_read()); + }; + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES", "a"}; + TColumnAccessPaths predicate_access_paths = all_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + assert_need_to_read(top_struct.get()); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*", "KEYS"}; + TColumnAccessPaths predicate_access_paths; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + assert_need_to_read(top_struct.get()); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*"}; + TColumnAccessPaths predicate_access_paths; + predicate_access_paths.emplace_back(); + predicate_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES"}; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); + assert_need_to_read(top_struct.get()); + } + + { + auto top_struct = build_nested_iterator(); + TColumnAccessPaths all_access_paths; + all_access_paths.emplace_back(); + all_access_paths[0].data_access_path.path = {"root", "col2", "*", "VALUES"}; + TColumnAccessPaths predicate_access_paths; + predicate_access_paths.emplace_back(); + predicate_access_paths[0].data_access_path.path = {"root", "col2", "*", "KEYS"}; + + auto st = top_struct->set_access_paths(all_access_paths, predicate_access_paths); + EXPECT_TRUE(st.ok()); + } } TEST_F(ColumnReaderTest, MultiAccessPaths) { @@ -202,20 +1986,318 @@ TEST_F(ColumnReaderTest, MultiAccessPaths) { auto st = iterator->set_access_paths(all_access_paths, predicate_access_paths); ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string(); - ASSERT_EQ(iterator->_reading_flag, ColumnIterator::ReadingFlag::NEED_TO_READ); + ASSERT_EQ(iterator->_read_requirement, ColumnIterator::ReadRequirement::LAZY_OUTPUT); - ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag, - ColumnIterator::ReadingFlag::SKIP_READING); - ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag, - ColumnIterator::ReadingFlag::NEED_TO_READ); + ASSERT_EQ(iterator->_sub_column_iterators[0]->_read_requirement, + ColumnIterator::ReadRequirement::SKIP); + ASSERT_EQ(iterator->_sub_column_iterators[1]->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); auto* array_iter = static_cast(iterator->_sub_column_iterators[1].get()); - ASSERT_EQ(array_iter->_item_iterator->_reading_flag, ColumnIterator::ReadingFlag::NEED_TO_READ); + ASSERT_EQ(array_iter->_item_iterator->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); auto* map_iter = static_cast(array_iter->_item_iterator.get()); - ASSERT_EQ(map_iter->_key_iterator->_reading_flag, ColumnIterator::ReadingFlag::NEED_TO_READ); - ASSERT_EQ(map_iter->_val_iterator->_reading_flag, ColumnIterator::ReadingFlag::SKIP_READING); + ASSERT_EQ(map_iter->_key_iterator->_read_requirement, + ColumnIterator::ReadRequirement::LAZY_OUTPUT); + ASSERT_EQ(map_iter->_val_iterator->_read_requirement, ColumnIterator::ReadRequirement::SKIP); +} + +TEST_F(ColumnReaderTest, StructNextBatchAndReadByRowidsUseSequentialChildReads) { + std::vector sub_column_iterators; + auto first_child = std::make_unique(); + auto* first_child_ptr = first_child.get(); + auto second_child = std::make_unique(); + auto* second_child_ptr = second_child.get(); + sub_column_iterators.emplace_back(std::move(first_child)); + sub_column_iterators.emplace_back(std::move(second_child)); + + StructFileColumnIterator struct_iterator(create_test_reader(), nullptr, + std::move(sub_column_iterators)); + + MutableColumnPtr dst = create_int_struct_column(2); + size_t rows = 3; + bool has_null = false; + auto st = struct_iterator.next_batch(&rows, dst, &has_null); + ASSERT_TRUE(st.ok()) << "struct next_batch failed: " << st.to_string(); + EXPECT_EQ(3, rows); + EXPECT_EQ(3, dst->size()); + EXPECT_THAT(first_child_ptr->next_batch_sizes, ::testing::ElementsAre(3)); + EXPECT_THAT(second_child_ptr->next_batch_sizes, ::testing::ElementsAre(3)); + + first_child_ptr->clear_tracking(); + second_child_ptr->clear_tracking(); + + const rowid_t rowids[] = {0, 1, 4, 5, 6}; + st = struct_iterator.read_by_rowids(rowids, std::size(rowids), dst); + ASSERT_TRUE(st.ok()) << "struct read_by_rowids failed: " << st.to_string(); + EXPECT_EQ(8, dst->size()); + EXPECT_THAT(first_child_ptr->seek_ordinals, ::testing::ElementsAre(0, 4)); + EXPECT_THAT(second_child_ptr->seek_ordinals, ::testing::ElementsAre(0, 4)); + EXPECT_THAT(first_child_ptr->next_batch_sizes, ::testing::ElementsAre(2, 3)); + EXPECT_THAT(second_child_ptr->next_batch_sizes, ::testing::ElementsAre(2, 3)); + EXPECT_TRUE(first_child_ptr->read_by_rowids_batches.empty()); + EXPECT_TRUE(second_child_ptr->read_by_rowids_batches.empty()); +} + +TEST_F(ColumnReaderTest, StructNullMapOnlyNextBatchSkipsSubColumns) { + auto null_iterator = std::make_unique(); + auto* null_iterator_ptr = null_iterator.get(); + std::vector sub_column_iterators; + auto child_iterator = std::make_unique(); + auto* child_iterator_ptr = child_iterator.get(); + child_iterator->set_column_name("field"); + sub_column_iterators.emplace_back(std::move(child_iterator)); + + StructFileColumnIterator struct_iterator(create_test_reader(true), std::move(null_iterator), + std::move(sub_column_iterators)); + struct_iterator.set_column_name("s"); + + TColumnAccessPaths null_path {create_access_path({"s", ColumnIterator::ACCESS_NULL})}; + auto st = struct_iterator.set_access_paths(null_path, null_path); + ASSERT_TRUE(st.ok()) << "set_access_paths failed: " << st.to_string(); + EXPECT_TRUE(struct_iterator.read_null_map_only()); + EXPECT_EQ(child_iterator_ptr->read_requirement(), ColumnIterator::ReadRequirement::SKIP); + + MutableColumnPtr dst = create_nullable_int_struct_column(1); + size_t rows = 2; + bool has_null = false; + st = struct_iterator.next_batch(&rows, dst, &has_null); + ASSERT_TRUE(st.ok()) << "struct null-map-only next_batch failed: " << st.to_string(); + EXPECT_TRUE(has_null); + EXPECT_EQ(2, dst->size()); + EXPECT_THAT(null_iterator_ptr->next_batch_sizes, ::testing::ElementsAre(2)); + EXPECT_TRUE(child_iterator_ptr->next_batch_sizes.empty()); + + const auto& nullable_column = assert_cast(*dst); + EXPECT_EQ(2, nullable_column.get_null_map_column().size()); + const auto& nested_struct = assert_cast( + nullable_column.get_nested_column()); + EXPECT_EQ(2, nested_struct.get_column(0).size()); +} + +TEST_F(ColumnReaderTest, ArrayNullMapOnlyNextBatchAndReadByRowidsSkipItems) { + auto null_iterator = std::make_unique(); + auto* null_iterator_ptr = null_iterator.get(); + auto item_iterator = std::make_unique(); + auto* item_iterator_ptr = item_iterator.get(); + auto offset_iterator = create_tracking_offset_iterator(); + + ArrayFileColumnIterator array_iterator(create_test_reader(true), + std::move(offset_iterator.iterator), + std::move(item_iterator), std::move(null_iterator)); + array_iterator.set_column_name("a"); + + TColumnAccessPaths null_path {create_access_path({"a", ColumnIterator::ACCESS_NULL})}; + auto st = array_iterator.set_access_paths(null_path, null_path); + ASSERT_TRUE(st.ok()) << "set_access_paths failed: " << st.to_string(); + EXPECT_TRUE(array_iterator.read_null_map_only()); + EXPECT_EQ(item_iterator_ptr->read_requirement(), ColumnIterator::ReadRequirement::SKIP); + + MutableColumnPtr dst = create_nullable_int_array_column(); + size_t rows = 3; + bool has_null = false; + st = array_iterator.next_batch(&rows, dst, &has_null); + ASSERT_TRUE(st.ok()) << "array null-map-only next_batch failed: " << st.to_string(); + EXPECT_TRUE(has_null); + EXPECT_EQ(3, dst->size()); + EXPECT_THAT(null_iterator_ptr->next_batch_sizes, ::testing::ElementsAre(3)); + EXPECT_TRUE(item_iterator_ptr->next_batch_sizes.empty()); + EXPECT_TRUE(offset_iterator.tracker->next_batch_sizes.empty()); + + null_iterator_ptr->clear_tracking(); + item_iterator_ptr->clear_tracking(); + + const rowid_t rowids[] = {1, 3}; + st = array_iterator.read_by_rowids(rowids, std::size(rowids), dst); + ASSERT_TRUE(st.ok()) << "array null-map-only read_by_rowids failed: " << st.to_string(); + EXPECT_EQ(5, dst->size()); + EXPECT_THAT(null_iterator_ptr->seek_ordinals, ::testing::ElementsAre(1, 3)); + EXPECT_THAT(null_iterator_ptr->next_batch_sizes, ::testing::ElementsAre(1, 1)); + EXPECT_TRUE(item_iterator_ptr->next_batch_sizes.empty()); + EXPECT_TRUE(offset_iterator.tracker->next_batch_sizes.empty()); +} + +TEST_F(ColumnReaderTest, MapNullMapOnlyNextBatchAndReadByRowidsSkipKeysAndValues) { + auto null_iterator = std::make_unique(); + auto* null_iterator_ptr = null_iterator.get(); + auto key_iterator = std::make_unique(); + auto* key_iterator_ptr = key_iterator.get(); + auto value_iterator = std::make_unique(); + auto* value_iterator_ptr = value_iterator.get(); + auto offset_iterator = create_tracking_offset_iterator(); + + MapFileColumnIterator map_iterator(create_test_reader(true, 4), std::move(null_iterator), + std::move(offset_iterator.iterator), std::move(key_iterator), + std::move(value_iterator)); + map_iterator.set_column_name("m"); + + TColumnAccessPaths null_path {create_access_path({"m", ColumnIterator::ACCESS_NULL})}; + auto st = map_iterator.set_access_paths(null_path, null_path); + ASSERT_TRUE(st.ok()) << "set_access_paths failed: " << st.to_string(); + EXPECT_TRUE(map_iterator.read_null_map_only()); + EXPECT_EQ(key_iterator_ptr->read_requirement(), ColumnIterator::ReadRequirement::SKIP); + EXPECT_EQ(value_iterator_ptr->read_requirement(), ColumnIterator::ReadRequirement::SKIP); + + MutableColumnPtr dst = create_nullable_int_map_column(); + size_t rows = 3; + bool has_null = false; + st = map_iterator.next_batch(&rows, dst, &has_null); + ASSERT_TRUE(st.ok()) << "map null-map-only next_batch failed: " << st.to_string(); + EXPECT_TRUE(has_null); + EXPECT_EQ(3, dst->size()); + EXPECT_THAT(null_iterator_ptr->next_batch_sizes, ::testing::ElementsAre(3)); + EXPECT_TRUE(key_iterator_ptr->next_batch_sizes.empty()); + EXPECT_TRUE(value_iterator_ptr->next_batch_sizes.empty()); + EXPECT_TRUE(offset_iterator.tracker->next_batch_sizes.empty()); + + null_iterator_ptr->clear_tracking(); + key_iterator_ptr->clear_tracking(); + value_iterator_ptr->clear_tracking(); + + const rowid_t rowids[] = {1, 3}; + st = map_iterator.read_by_rowids(rowids, std::size(rowids), dst); + ASSERT_TRUE(st.ok()) << "map null-map-only read_by_rowids failed: " << st.to_string(); + EXPECT_EQ(5, dst->size()); + ASSERT_EQ(1, null_iterator_ptr->read_by_rowids_batches.size()); + EXPECT_THAT(null_iterator_ptr->read_by_rowids_batches[0], ::testing::ElementsAre(1, 3)); + EXPECT_TRUE(key_iterator_ptr->next_batch_sizes.empty()); + EXPECT_TRUE(value_iterator_ptr->next_batch_sizes.empty()); + EXPECT_TRUE(offset_iterator.tracker->next_batch_sizes.empty()); +} + +TEST_F(ColumnReaderTest, CollectPrefetchersHonorsNestedReadRequirements) { + auto null_iterator = std::make_unique(); + auto* null_iterator_ptr = null_iterator.get(); + std::vector sub_column_iterators; + auto predicate_child = std::make_unique(); + auto* predicate_child_ptr = predicate_child.get(); + auto lazy_child = std::make_unique(); + auto* lazy_child_ptr = lazy_child.get(); + sub_column_iterators.emplace_back(std::move(predicate_child)); + sub_column_iterators.emplace_back(std::move(lazy_child)); + + StructFileColumnIterator struct_iterator(create_test_reader(true), std::move(null_iterator), + std::move(sub_column_iterators)); + struct_iterator.set_read_requirement_self(ColumnIterator::ReadRequirement::PREDICATE); + predicate_child_ptr->set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + lazy_child_ptr->set_read_requirement(ColumnIterator::ReadRequirement::LAZY_OUTPUT); + struct_iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + + std::map> prefetchers; + struct_iterator.collect_prefetchers(prefetchers, PrefetcherInitMethod::FROM_ROWIDS); + + EXPECT_THAT(null_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); + EXPECT_THAT(predicate_child_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); + EXPECT_TRUE(lazy_child_ptr->collect_methods.empty()); + EXPECT_THAT(prefetchers[PrefetcherInitMethod::FROM_ROWIDS], + ::testing::ElementsAre(null_iterator_ptr->prefetcher(), + predicate_child_ptr->prefetcher())); + + null_iterator_ptr->clear_tracking(); + predicate_child_ptr->clear_tracking(); + lazy_child_ptr->clear_tracking(); + prefetchers.clear(); + + struct_iterator.set_read_phase(ColumnIterator::ReadPhase::LAZY); + struct_iterator.collect_prefetchers(prefetchers, PrefetcherInitMethod::FROM_ROWIDS); + + EXPECT_THAT(null_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); + EXPECT_TRUE(predicate_child_ptr->collect_methods.empty()); + EXPECT_THAT(lazy_child_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); +} + +TEST_F(ColumnReaderTest, ArrayAndMapCollectPrefetchersUseAllDataBlocksForNestedData) { + { + auto null_iterator = std::make_unique(); + auto* null_iterator_ptr = null_iterator.get(); + auto item_iterator = std::make_unique(); + auto* item_iterator_ptr = item_iterator.get(); + auto offset_iterator = create_tracking_offset_iterator(); + auto* offset_iterator_ptr = offset_iterator.tracker; + + ArrayFileColumnIterator array_iterator(create_test_reader(true), + std::move(offset_iterator.iterator), + std::move(item_iterator), std::move(null_iterator)); + array_iterator.set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + array_iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + + std::map> prefetchers; + array_iterator.collect_prefetchers(prefetchers, PrefetcherInitMethod::FROM_ROWIDS); + + EXPECT_THAT(offset_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); + EXPECT_THAT(null_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); + EXPECT_THAT(item_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::ALL_DATA_BLOCKS)); + EXPECT_THAT(prefetchers[PrefetcherInitMethod::ALL_DATA_BLOCKS], + ::testing::ElementsAre(item_iterator_ptr->prefetcher())); + } + + { + auto null_iterator = std::make_unique(); + auto* null_iterator_ptr = null_iterator.get(); + auto key_iterator = std::make_unique(); + auto* key_iterator_ptr = key_iterator.get(); + auto value_iterator = std::make_unique(); + auto* value_iterator_ptr = value_iterator.get(); + auto offset_iterator = create_tracking_offset_iterator(); + auto* offset_iterator_ptr = offset_iterator.tracker; + + MapFileColumnIterator map_iterator(create_test_reader(true), std::move(null_iterator), + std::move(offset_iterator.iterator), + std::move(key_iterator), std::move(value_iterator)); + map_iterator.set_read_requirement(ColumnIterator::ReadRequirement::PREDICATE); + map_iterator.set_read_phase(ColumnIterator::ReadPhase::PREDICATE); + + std::map> prefetchers; + map_iterator.collect_prefetchers(prefetchers, PrefetcherInitMethod::FROM_ROWIDS); + + EXPECT_THAT(offset_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); + EXPECT_THAT(null_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::FROM_ROWIDS)); + EXPECT_THAT(key_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::ALL_DATA_BLOCKS)); + EXPECT_THAT(value_iterator_ptr->collect_methods, + ::testing::ElementsAre(PrefetcherInitMethod::ALL_DATA_BLOCKS)); + EXPECT_THAT(prefetchers[PrefetcherInitMethod::ALL_DATA_BLOCKS], + ::testing::ElementsAre(key_iterator_ptr->prefetcher(), + value_iterator_ptr->prefetcher())); + } +} + +TEST_F(ColumnReaderTest, MapPredicateAccessAllWithOffsetKeepsKeysReadable) { + auto map_reader = create_test_reader(false, 0, FieldType::OLAP_FIELD_TYPE_MAP); + auto key_iter = std::make_unique( + create_test_reader(false, 0, FieldType::OLAP_FIELD_TYPE_STRING)); + key_iter->set_column_name("key"); + auto* key_ptr = key_iter.get(); + auto val_iter = std::make_unique( + create_test_reader(false, 0, FieldType::OLAP_FIELD_TYPE_STRING)); + val_iter->set_column_name("value"); + auto* val_ptr = val_iter.get(); + auto offset_iterator = create_tracking_offset_iterator(); + + MapFileColumnIterator map_iter(map_reader, nullptr, std::move(offset_iterator.iterator), + std::move(key_iter), std::move(val_iter)); + map_iter.set_column_name("map_col"); + + TColumnAccessPaths access_paths {create_access_path( + {"map_col", ColumnIterator::ACCESS_ALL, ColumnIterator::ACCESS_OFFSET})}; + auto st = map_iter.set_access_paths(access_paths, access_paths); + ASSERT_TRUE(st.ok()) << "set_access_paths failed: " << st.to_string(); + + EXPECT_EQ(key_ptr->read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_FALSE(key_ptr->read_offset_only()); + EXPECT_EQ(val_ptr->read_requirement(), ColumnIterator::ReadRequirement::PREDICATE); + EXPECT_TRUE(val_ptr->read_offset_only()); } TEST_F(ColumnReaderTest, OffsetPeekUsesPageSentinelWhenNoRemaining) { @@ -285,7 +2367,7 @@ TEST_F(ColumnReaderTest, MapReadByRowidsSkipReadingResizesDestination) { MapFileColumnIterator map_iter(map_reader, std::move(null_iter), std::move(offsets_iter), std::move(key_iter), std::move(val_iter)); map_iter.set_column_name("map_col"); - map_iter.set_reading_flag(ColumnIterator::ReadingFlag::SKIP_READING); + map_iter.set_read_requirement(ColumnIterator::ReadRequirement::SKIP); // prepare an empty ColumnMap as destination auto keys = ColumnInt32::create(); @@ -328,14 +2410,14 @@ TEST_F(ColumnReaderTest, MapAccessAllWithOffsetDoesNotPropagateOffsetToKey) { auto st = map_iter.set_access_paths(all_access_paths, predicate_access_paths); ASSERT_TRUE(st.ok()) << "set_access_paths failed: " << st.to_string(); - // Key must be fully readable (NEED_TO_READ), NOT in OFFSET_ONLY mode. + // Key must be fully readable (LAZY_OUTPUT), NOT in OFFSET_ONLY mode. auto* key_ptr = static_cast(map_iter._key_iterator.get()); - ASSERT_EQ(key_ptr->_reading_flag, ColumnIterator::ReadingFlag::NEED_TO_READ); + ASSERT_EQ(key_ptr->_read_requirement, ColumnIterator::ReadRequirement::LAZY_OUTPUT); ASSERT_FALSE(key_ptr->read_offset_only()); // Value should be in OFFSET_ONLY mode since we only need string lengths. auto* val_ptr = static_cast(map_iter._val_iterator.get()); - ASSERT_EQ(val_ptr->_reading_flag, ColumnIterator::ReadingFlag::NEED_TO_READ); + ASSERT_EQ(val_ptr->_read_requirement, ColumnIterator::ReadRequirement::LAZY_OUTPUT); ASSERT_TRUE(val_ptr->read_offset_only()); } diff --git a/be/test/storage/segment/segment_iterator_lazy_pruned_test.cpp b/be/test/storage/segment/segment_iterator_lazy_pruned_test.cpp new file mode 100644 index 00000000000000..338804fe5d558e --- /dev/null +++ b/be/test/storage/segment/segment_iterator_lazy_pruned_test.cpp @@ -0,0 +1,187 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +#include "common/cast_set.h" +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_number.h" +#include "storage/olap_common.h" +#include "storage/segment/column_reader.h" +#include "storage/tablet/tablet_schema.h" + +// Use #define private public to access SegmentIterator::_read_lazy_pruned_columns() +// and the small amount of state it consumes. This mirrors the existing +// segment_iterator_* white-box tests. +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wkeyword-macro" +#endif +#define private public +#include "storage/segment/segment_iterator.h" +#undef private +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +namespace doris::segment_v2 { +namespace { + +class TrackingLazyColumnIterator final : public ColumnIterator { +public: + Status seek_to_ordinal(ordinal_t ord) override { + seek_ordinals.push_back(ord); + return Status::OK(); + } + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + MutableColumnPtr& dst) override { + read_phases.push_back(_read_phase); + read_rowids.assign(rowids, rowids + count); + ++read_by_rowids_count; + + auto& int_column = assert_cast&>(*dst); + for (size_t i = 0; i < count; ++i) { + int_column.insert_value(cast_set(rowids[i])); + } + return Status::OK(); + } + + void finalize_lazy_phase(MutableColumnPtr& dst) override { + finalize_phases.push_back(_read_phase); + ++finalize_count; + } + + ordinal_t get_current_ordinal() const override { return 0; } + + ReadPhase phase() const { return _read_phase; } + + std::vector seek_ordinals; + std::vector read_rowids; + std::vector read_phases; + std::vector finalize_phases; + int read_by_rowids_count = 0; + int finalize_count = 0; +}; + +TabletSchemaSPtr make_tablet_schema() { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + auto* col = schema_pb.add_column(); + col->set_unique_id(0); + col->set_name("c0"); + col->set_type("INT"); + col->set_is_key(true); + col->set_is_nullable(false); + + auto tablet_schema = std::make_shared(); + tablet_schema->init_from_pb(schema_pb); + return tablet_schema; +} + +SchemaSPtr make_read_schema(const TabletSchemaSPtr& tablet_schema) { + std::vector read_column_ids(tablet_schema->num_columns()); + for (uint32_t cid = 0; cid < read_column_ids.size(); ++cid) { + read_column_ids[cid] = cid; + } + return std::make_shared(tablet_schema->columns(), read_column_ids); +} + +Block make_int_block() { + Block block; + block.insert({ColumnInt32::create(), std::make_shared(), "c0"}); + return block; +} + +} // namespace + +class SegmentIteratorLazyPrunedTest : public ::testing::Test { +protected: + void SetUp() override { + _tablet_schema = make_tablet_schema(); + _read_schema = make_read_schema(_tablet_schema); + } + + std::unique_ptr make_iter(TrackingLazyColumnIterator** tracking_iter) { + auto iter = std::make_unique(nullptr, _read_schema); + iter->_opts.tablet_schema = _tablet_schema; + iter->_opts.stats = &_stats; + iter->_schema_block_id_map = {0}; + iter->_support_lazy_read_pruned_columns.insert(0); + iter->_column_iterators.resize(1); + + auto column_iter = std::make_unique(); + *tracking_iter = column_iter.get(); + iter->_column_iterators[0] = std::move(column_iter); + return iter; + } + + TabletSchemaSPtr _tablet_schema; + SchemaSPtr _read_schema; + OlapReaderStatistics _stats; +}; + +TEST_F(SegmentIteratorLazyPrunedTest, readsSelectedRowidsInLazyPhaseAndRestoresPhase) { + TrackingLazyColumnIterator* tracking_iter = nullptr; + auto iter = make_iter(&tracking_iter); + iter->_selected_size = 2; + iter->_block_rowids = {10, 20, 30, 40}; + iter->_sel_rowid_idx = {2, 0}; + + auto block = make_int_block(); + auto st = iter->_read_lazy_pruned_columns(&block); + ASSERT_TRUE(st.ok()) << st.to_string(); + + EXPECT_EQ(tracking_iter->read_by_rowids_count, 1); + EXPECT_EQ(tracking_iter->finalize_count, 1); + EXPECT_EQ(tracking_iter->read_rowids, (std::vector {30, 10})); + EXPECT_EQ(tracking_iter->read_phases, + (std::vector {ColumnIterator::ReadPhase::LAZY})); + EXPECT_EQ(tracking_iter->finalize_phases, + (std::vector {ColumnIterator::ReadPhase::LAZY})); + EXPECT_EQ(tracking_iter->phase(), ColumnIterator::ReadPhase::NORMAL); + + const auto& result = + assert_cast&>(*block.get_by_position(0).column); + ASSERT_EQ(result.size(), 2); + EXPECT_EQ(result.get_data()[0], 30); + EXPECT_EQ(result.get_data()[1], 10); +} + +TEST_F(SegmentIteratorLazyPrunedTest, emptySelectionStillFinalizesLazyPlaceholders) { + TrackingLazyColumnIterator* tracking_iter = nullptr; + auto iter = make_iter(&tracking_iter); + iter->_selected_size = 0; + + auto block = make_int_block(); + auto st = iter->_read_lazy_pruned_columns(&block); + ASSERT_TRUE(st.ok()) << st.to_string(); + + EXPECT_EQ(tracking_iter->read_by_rowids_count, 0); + EXPECT_EQ(tracking_iter->finalize_count, 1); + EXPECT_EQ(tracking_iter->finalize_phases, + (std::vector {ColumnIterator::ReadPhase::LAZY})); + EXPECT_EQ(tracking_iter->phase(), ColumnIterator::ReadPhase::NORMAL); + EXPECT_EQ(block.get_by_position(0).column->size(), 0); +} + +} // namespace doris::segment_v2 diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java index 9e6807cc0b2905..9dcbfea57dd497 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java @@ -19,11 +19,11 @@ import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.processor.post.TopnFilterPushDownVisitor.PushDownContext; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.SortPhase; import org.apache.doris.nereids.trees.plans.algebra.TopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN; +import org.apache.doris.nereids.types.DataType; /** * topN opt @@ -60,13 +60,20 @@ boolean checkTopN(TopN topN) { return false; } - Expression firstKey = topN.getOrderKeys().get(0).getExpr(); + DataType firstKeyType = topN.getOrderKeys().get(0).getExpr().getDataType(); - if (firstKey.getDataType().isFloatType() - || firstKey.getDataType().isDoubleType()) { - return false; - } - return true; + return isSupportedTopNRuntimeFilterType(firstKeyType); + } + + private boolean isSupportedTopNRuntimeFilterType(DataType dataType) { + return dataType.isBooleanType() + || dataType.isIntegralType() + || dataType.isDecimalLikeType() + || dataType.isStringLikeType() + || dataType.isDateLikeType() + || dataType.isTimeType() + || dataType.isIPType() + || dataType.isVarBinaryType(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathStriper.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathStriper.java deleted file mode 100644 index 8f08699ec232d3..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathStriper.java +++ /dev/null @@ -1,300 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.rewrite; - -import org.apache.doris.analysis.ColumnAccessPathType; -import org.apache.doris.common.Pair; - -import com.google.common.collect.Multimap; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * Strips redundant metadata-only (NULL/OFFSET) access paths using pure - * string-prefix comparison. Map-level wildcards must already have been - * expanded into {@code KEYS} + {@code VALUES} by the caller. - * - *

Single entry point: {@link #strip(int, Multimap, Multimap)}. - */ -public final class MetaPathStriper { - - private MetaPathStriper() {} - - /** - * Strip redundant metadata-only NULL/OFFSET paths using pure string-prefix - * comparison, keeping enough real paths for BE readers to avoid - * OFFSET_ONLY / NULL_MAP_ONLY modes that skip required child data. - * - *

Stripping is organised in two levels: - * - *

Level 1 — Same-prefix priority: when two paths share the same - * prefix and differ only in the final meta suffix, the higher-priority one - * eliminates the lower. - *

{@code
-     *   Data  >  OFFSET  >  NULL
-     * }
- *
    - *
  • {@code Data} strips {@code OFFSET}: {@code [a]} strips {@code [a, OFFSET]}.
  • - *
  • {@code Data} strips {@code NULL}: {@code [a]} strips {@code [a, NULL]}.
  • - *
  • {@code OFFSET} strips {@code NULL}: {@code [a, OFFSET]} strips - * {@code [a, NULL]}.
  • - *
- * - *

Level 2 — Deeper-prefix coverage: when a covering path goes - * deeper into the type tree, its data reader already materialises the - * container, making a shallower meta-only path redundant. - *

    - *
  • Target suffix {@code OFFSET}, covered by deeper: - *
      - *
    • {@code Data}: {@code [a, *, field]} strips {@code [a, OFFSET]}.
    • - *
    • {@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a, OFFSET]}.
    • - *
    • {@code NULL}: {@code [a, *, NULL]} strips {@code [a, OFFSET]}.
    • - *
    - *
  • - *
  • Target suffix {@code NULL}, covered by deeper: - *
      - *
    • {@code Data}: {@code [a, b, c]} strips {@code [a, b, NULL]}.
    • - *
    • {@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a, NULL]}.
    • - *
    • {@code NULL}: {@code [a, *, NULL]} strips {@code [a, NULL]}.
    • - *
    - *
  • - *
- * - *

Pre-condition: map-level {@code *} wildcards must already have been - * expanded into {@code KEYS} + {@code VALUES} by the caller. This class - * uses pure string-prefix comparison and is type-unaware. - */ - public static void strip( - int slotId, - Multimap>> targetAccessPaths, - Multimap>> coveringAccessPaths) { - stripExactPrefixCoveredMetaPaths(slotId, targetAccessPaths, coveringAccessPaths); - stripNullBySamePrefixOffset(slotId, targetAccessPaths); - - stripMetaPathsByDeeperPrefix(slotId, AccessPathInfo.ACCESS_OFFSET, - targetAccessPaths, coveringAccessPaths); - stripMetaPathsByDeeperPrefix(slotId, AccessPathInfo.ACCESS_NULL, - targetAccessPaths, coveringAccessPaths); - } - - // ======================================================================== - // Path helpers - // ======================================================================== - - private static boolean isMetaPath(List path) { - if (path.isEmpty()) { - return false; - } - String lastComponent = path.get(path.size() - 1); - return AccessPathInfo.ACCESS_NULL.equals(lastComponent) - || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); - } - - private static List> collectPaths( - Collection>> a, - Collection>> b, boolean meta) { - List> result = new ArrayList<>(); - for (Pair> p : a) { - if (!p.second.isEmpty() && isMetaPath(p.second) == meta) { - result.add(p.second); - } - } - for (Pair> p : b) { - if (!p.second.isEmpty() && isMetaPath(p.second) == meta) { - result.add(p.second); - } - } - return result; - } - - private static boolean isPrefixCovered(List prefix, List coveringPath) { - if (coveringPath.isEmpty()) { - return true; - } - int minLen = Math.min(prefix.size(), coveringPath.size()); - for (int i = 0; i < minLen; i++) { - if (!prefix.get(i).equals(coveringPath.get(i))) { - return false; - } - } - return true; - } - - // ======================================================================== - // Level 1 — same-prefix priority - // ======================================================================== - - /** - * {@code [prefix]} strips {@code [prefix, OFFSET]} and {@code [prefix, NULL]}. - */ - private static void stripExactPrefixCoveredMetaPaths( - int slotId, - Multimap>> targetAccessPaths, - Multimap>> coveringAccessPaths) { - Collection>> targetPaths = targetAccessPaths.get(slotId); - if (targetPaths.isEmpty()) { - return; - } - - List> fullAccessPaths = collectPaths( - coveringAccessPaths.get(slotId), targetPaths, false); - - List>> pathsToRemove = new ArrayList<>(); - for (Pair> p : targetPaths) { - List path = p.second; - if (!isMetaPath(path)) { - continue; - } - List prefix = path.subList(0, path.size() - 1); - for (List fullAccessPath : fullAccessPaths) { - if (isPrefixCovered(prefix, fullAccessPath) - && prefix.size() >= fullAccessPath.size()) { - pathsToRemove.add(p); - break; - } - } - } - targetPaths.removeAll(pathsToRemove); - } - - /** - * {@code [prefix, OFFSET]} strips {@code [prefix, NULL]}. - */ - private static void stripNullBySamePrefixOffset( - int slotId, Multimap>> allAccessPaths) { - Collection>> slotPaths = allAccessPaths.get(slotId); - if (slotPaths.isEmpty()) { - return; - } - - List>> toRemove = new ArrayList<>(); - for (Pair> p : slotPaths) { - List path = p.second; - if (path.isEmpty() || !AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) { - continue; - } - List prefix = path.subList(0, path.size() - 1); - for (Pair> q : slotPaths) { - List other = q.second; - if (other == path || other.isEmpty()) { - continue; - } - if (other.size() != path.size() - || !AccessPathInfo.ACCESS_OFFSET.equals(other.get(other.size() - 1))) { - continue; - } - List otherPrefix = other.subList(0, other.size() - 1); - if (isPrefixCovered(prefix, otherPrefix)) { - toRemove.add(p); - break; - } - } - } - slotPaths.removeAll(toRemove); - } - - // ======================================================================== - // Level 2 — deeper-prefix coverage - // ======================================================================== - - private static void stripMetaPathsByDeeperPrefix( - int slotId, String metaSuffix, - Multimap>> targetAccessPaths, - Multimap>> coveringAccessPaths) { - Collection>> targetPaths = targetAccessPaths.get(slotId); - if (targetPaths.isEmpty()) { - return; - } - Collection>> coveringPaths = - coveringAccessPaths.get(slotId); - - List> dataPaths = collectPaths(coveringPaths, targetPaths, false); - stripMetaByDeeperDataPaths(slotId, targetAccessPaths, dataPaths, metaSuffix); - - List> metaPaths = collectPaths(coveringPaths, targetPaths, true); - stripMetaByDeeperMetaPaths(slotId, metaSuffix, metaPaths, targetAccessPaths); - } - - /** - * Strip target meta paths covered by a data path. - */ - private static void stripMetaByDeeperDataPaths( - int slotId, - Multimap>> targetAccessPaths, - List> dataPaths, String metaSuffix) { - Collection>> targetPaths = - targetAccessPaths.get(slotId); - if (targetPaths.isEmpty() || dataPaths.isEmpty()) { - return; - } - - List>> toRemove = new ArrayList<>(); - for (Pair> p : new ArrayList<>(targetPaths)) { - List path = p.second; - if (path.isEmpty() || !metaSuffix.equals(path.get(path.size() - 1))) { - continue; - } - List prefix = path.subList(0, path.size() - 1); - for (List dataPath : dataPaths) { - if (isPrefixCovered(prefix, dataPath)) { - toRemove.add(p); - break; - } - } - } - targetPaths.removeAll(toRemove); - } - - /** - * Strip target meta paths covered by a strictly deeper meta path. - */ - private static void stripMetaByDeeperMetaPaths( - int slotId, String metaSuffix, - List> coveringPaths, - Multimap>> targetAccessPaths) { - Collection>> targetPaths = - targetAccessPaths.get(slotId); - if (targetPaths.isEmpty() || coveringPaths.isEmpty()) { - return; - } - - List>> toRemove = new ArrayList<>(); - for (Pair> p : targetPaths) { - List targetPath = p.second; - if (targetPath.isEmpty() || !metaSuffix.equals(targetPath.get(targetPath.size() - 1))) { - continue; - } - List targetPrefix = targetPath.subList(0, targetPath.size() - 1); - for (List coveringPath : coveringPaths) { - if (coveringPath == targetPath || coveringPath.isEmpty()) { - continue; - } - if (coveringPath.size() - 1 <= targetPath.size() - 1) { - continue; - } - if (isPrefixCovered(targetPrefix, coveringPath)) { - toRemove.add(p); - break; - } - } - } - targetPaths.removeAll(toRemove); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java index 5a3a55d3782a56..086d1b7bae1048 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java @@ -225,12 +225,17 @@ private static Map pruneDataType( for (Entry> kv : slotToAccessPaths.entrySet()) { Slot slot = kv.getKey(); List collectAccessPathResults = kv.getValue(); + boolean hasRegularAccessPath = collectAccessPathResults.stream() + .anyMatch(resultItem -> !isDataSkippingOnlyAccessPath(resultItem.getPath())); if (slot.getDataType() instanceof VariantType) { variantSlots.put(slot, slot.getDataType()); for (CollectAccessPathResult collectAccessPathResult : collectAccessPathResults) { List path = collectAccessPathResult.getPath(); ColumnAccessPathType pathType = collectAccessPathResult.getType(); - allAccessPaths.put(slot.getExprId().asInt(), Pair.of(pathType, path)); + Pair> allPath = + normalizePredicateMetaPathForAllAccessPath( + slot, collectAccessPathResult, hasRegularAccessPath); + allAccessPaths.put(slot.getExprId().asInt(), allPath); if (collectAccessPathResult.isPredicate()) { predicateAccessPaths.put( slot.getExprId().asInt(), Pair.of(pathType, path) @@ -242,11 +247,14 @@ private static Map pruneDataType( for (CollectAccessPathResult collectAccessPathResult : collectAccessPathResults) { List path = collectAccessPathResult.getPath(); ColumnAccessPathType pathType = collectAccessPathResult.getType(); + Pair> allPath = + normalizePredicateMetaPathForAllAccessPath( + slot, collectAccessPathResult, hasRegularAccessPath); DataTypeAccessTree allAccessTree = slotIdToAllAccessTree.computeIfAbsent( - slot, i -> DataTypeAccessTree.ofRoot(slot, pathType) + slot, i -> DataTypeAccessTree.ofRoot(slot, allPath.first) ); - allAccessTree.setAccessByPath(path, 0, pathType); - allAccessPaths.put(slot.getExprId().asInt(), Pair.of(pathType, path)); + allAccessTree.setAccessByPath(allPath.second, 0, allPath.first); + allAccessPaths.put(slot.getExprId().asInt(), allPath); if (collectAccessPathResult.isPredicate()) { DataTypeAccessTree predicateAccessTree = slotIdToPredicateAccessTree.computeIfAbsent( @@ -260,21 +268,21 @@ private static Map pruneDataType( } } - // phase 1.5: for slots with meta paths, expand map-star paths and strip - // redundant meta paths. Strip predicate first using the COMPLETE - // allAccessPaths as covering, then strip allAccessPaths self-covering. + // phase 1.5: for slots with meta paths, expand map-star paths before building final + // access path lists. Predicate NULL/OFFSET paths are kept in predicateAccessPaths. + // When regular data paths also exist, allAccessPaths uses the stripped data path for + // mixed-version safety with older BEs that only understand allAccessPaths, except + // map-star predicate metadata paths. Those paths must stay unstripped until this phase + // so map.*.OFFSET can become map.KEYS + map.VALUES.OFFSET instead of broad map.*. for (Entry kv : slotIdToAllAccessTree.entrySet()) { Slot slot = kv.getKey(); DataTypeAccessTree accessTree = kv.getValue(); if (!accessTree.hasOffsetPath() && !accessTree.hasNullPath()) { continue; } - int slotId = slot.getExprId().asInt(); // Expand both sets before stripping so covering is complete. expandMapStarPaths(slot, allAccessPaths); expandMapStarPaths(slot, predicateAccessPaths); - MetaPathStriper.strip(slotId, predicateAccessPaths, allAccessPaths); - MetaPathStriper.strip(slotId, allAccessPaths, allAccessPaths); } // second: build non-predicate access paths @@ -304,7 +312,7 @@ private static Map pruneDataType( buildColumnAccessPaths(slot, predicateAccessPaths); AccessPathInfo accessPathInfo = result.get(slot.getExprId().asInt()); if (accessPathInfo != null) { - retainPredicatePathsInFinalAllAccessPaths( + addPredicatePathsToFinalAllAccessPaths( predicatePaths, accessPathInfo.getAllAccessPaths()); accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths); } @@ -316,7 +324,7 @@ private static Map pruneDataType( buildColumnAccessPaths(slot, predicateAccessPaths); AccessPathInfo accessPathInfo = result.get(slot.getExprId().asInt()); if (accessPathInfo != null) { - retainPredicatePathsInFinalAllAccessPaths( + addPredicatePathsToFinalAllAccessPaths( predicatePaths, accessPathInfo.getAllAccessPaths()); accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths); } @@ -325,33 +333,86 @@ private static Map pruneDataType( return result; } + private static Pair> normalizePredicateMetaPathForAllAccessPath( + Slot slot, CollectAccessPathResult accessPathResult, boolean hasRegularAccessPath) { + List path = accessPathResult.getPath(); + ColumnAccessPathType pathType = accessPathResult.getType(); + if (accessPathResult.isPredicate() && hasRegularAccessPath + && isDataSkippingOnlyAccessPath(path)) { + if (hasMapStarAccessPath(slot, path)) { + // Keep map-star metadata until expandMapStarPaths() turns it into precise + // KEYS/VALUES paths. Stripping here would broaden map value reads to map.*. + return Pair.of(pathType, path); + } + return Pair.of(ColumnAccessPathType.DATA, stripDataSkippingSuffix(path)); + } + return Pair.of(pathType, path); + } + + private static boolean hasMapStarAccessPath(Slot slot, List path) { + List positions = new ArrayList<>(); + findMapStarPositions(path, slot.getDataType(), positions); + return !positions.isEmpty(); + } + /** - * Keep predicate access paths as a subset of final all access paths after NULL/OFFSET cleanup. - * Predicate paths are built from filter expressions first, but later all-path rewrites may drop - * metadata-only paths or collapse paths to whole-column access. Any predicate path not present - * in final all paths must be removed before sending access info to BE. + * Keep final allAccessPaths as a superset of non-metadata predicateAccessPaths. Predicate + * paths are collected from filter expressions first, but final all-path construction may later + * collapse ordinary paths to whole-column access. BE complex readers use allAccessPaths to + * decide which data sub-iterators can be pruned; any predicate data path missing from + * allAccessPaths can therefore make predicate reads disagree with pruning. * - *

Examples: - *

    - *
  • All paths {@code [s]}, predicate paths {@code [s.city.NULL]} becomes no predicate - * paths after parent NULL removal.
  • - *
  • All paths {@code [s.city.NULL, s.zip]}, predicate paths - * {@code [s.NULL, s.city.NULL]} becomes {@code [s.city.NULL]}.
  • - *
+ * Keep predicate NULL/OFFSET paths out of allAccessPaths for mixed-version safety. Older BEs + * decide current-level meta-only mode from allAccessPaths only, so sending both metadata and + * data paths there could make them skip required child data. Newer BEs still receive those + * metadata predicates through predicateAccessPaths. */ - private static void retainPredicatePathsInFinalAllAccessPaths( + private static void addPredicatePathsToFinalAllAccessPaths( List predicatePaths, List allPaths) { - if (predicatePaths.isEmpty()) { - return; + for (ColumnAccessPath predicatePath : predicatePaths) { + if (!isMetaPath(predicatePath) && !isCoveredByAllPath(predicatePath, allPaths)) { + allPaths.add(predicatePath); + } } + } - List toRemove = new ArrayList<>(); - for (ColumnAccessPath predicatePath : predicatePaths) { - if (!allPaths.contains(predicatePath)) { - toRemove.add(predicatePath); + private static boolean isCoveredByAllPath(ColumnAccessPath predicatePath, List allPaths) { + for (ColumnAccessPath allPath : allPaths) { + if (allPath.getType() == predicatePath.getType() + && isPrefixPath(allPath.getPath(), predicatePath.getPath())) { + return true; } } - predicatePaths.removeAll(toRemove); + return false; + } + + private static boolean isPrefixPath(List prefix, List path) { + if (prefix.size() > path.size()) { + return false; + } + for (int i = 0; i < prefix.size(); ++i) { + if (!prefix.get(i).equals(path.get(i))) { + return false; + } + } + return true; + } + + private static boolean isMetaPath(ColumnAccessPath path) { + return isDataSkippingOnlyAccessPath(path.getPath()); + } + + private static boolean isDataSkippingOnlyAccessPath(List components) { + if (components.isEmpty()) { + return false; + } + String lastComponent = components.get(components.size() - 1); + return AccessPathInfo.ACCESS_NULL.equals(lastComponent) + || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); + } + + private static List stripDataSkippingSuffix(List components) { + return new ArrayList<>(components.subList(0, components.size() - 1)); } private static List buildColumnAccessPaths( diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 2870c3443fefd8..05ef1b9bda8944 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -5644,6 +5644,8 @@ public TQueryOptions toThrift() { tResult.setReadCsvEmptyLineAsNull(readCsvEmptyLineAsNull); tResult.setSerdeDialect(getSerdeDialect()); + tResult.setEnablePruneNestedColumn(enablePruneNestedColumns); + tResult.setEnableMatchWithoutInvertedIndex(enableMatchWithoutInvertedIndex); tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex); tResult.setEnableInvertedIndexSearcherCache(enableInvertedIndexSearcherCache); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java index f66a284121ca58..84868c93dd7226 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java @@ -42,6 +42,7 @@ import org.junit.jupiter.api.Test; import org.mockito.Mockito; +import java.util.List; import java.util.Map; public class TopNRuntimeFilterTest extends SSBTestBase implements MemoPatternMatchSupported { @@ -125,6 +126,25 @@ public void testNotUseTopNRfOnWindow() { Assertions.assertFalse(checker.getCascadesContext().getTopnFilterContext().isTopnFilterSource(localTopN)); } + @Test + public void testNotUseTopNRfForUnsupportedComplexOrderKey() { + String sql = "select c_custkey from customer order by array(c_custkey) limit 5"; + PlanChecker checker = PlanChecker.from(connectContext).analyze(sql) + .rewrite() + .implement(); + PhysicalPlan plan = checker.getPhysicalPlan(); + plan = new PlanPostProcessors(checker.getCascadesContext()).process(plan); + + List> localTopNs = plan.collectToList( + node -> node instanceof PhysicalTopN + && ((PhysicalTopN) node).getSortPhase().isLocal()); + Assertions.assertFalse(localTopNs.isEmpty(), plan.treeString()); + for (PhysicalTopN localTopN : localTopNs) { + Assertions.assertFalse( + checker.getCascadesContext().getTopnFilterContext().isTopnFilterSource(localTopN)); + } + } + @Test public void testProbeExprNullableThroughRightOuterJoin() { // topn node push down filter value to scan node. diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java index 58411582aacbed..efbe72a75acecd 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java @@ -166,31 +166,11 @@ public void testMap() throws Exception { public void testMapElementLengthWithMapValuesKeepsKeysPath() throws Exception { assertColumn("select length(map_col['a']), map_values(map_col)[1] from str_tbl", "map", - ImmutableList.of(path("map_col", "KEYS"), path("map_col", "VALUES")), + ImmutableList.of(path("map_col", "KEYS"), path("map_col", "VALUES"), path("map_col", "VALUES", "OFFSET")), ImmutableList.of() ); } - @Test - public void testStructRootArrayMixedAccessSuppressesOffsetPath() throws Exception { - assertAllAccessPathsContain( - "select cardinality(element_at(s, 'arr')), " - + "element_at(element_at(element_at(s, 'arr'), 1), 'int_field') " - + "from nested_container_tbl", - ImmutableList.of(path("s", "arr", "*", "int_field")), - ImmutableList.of(path("s", "arr", "OFFSET"))); - } - - @Test - public void testStructRootMapMixedAccessKeepsKeysPath() throws Exception { - assertAllAccessPathsContain( - "select length(element_at(element_at(s, 'm'), 'a')), " - + "element_at(map_values(element_at(s, 'm')), 1) " - + "from nested_container_tbl", - ImmutableList.of(path("s", "m", "KEYS"), path("s", "m", "VALUES")), - ImmutableList.of(path("s", "m", "*", "OFFSET"), path("s", "m", "VALUES", "OFFSET"))); - } - @Test public void testCardinalityArrayElementKeepsOffsetPath() throws Exception { assertAllAccessPathsContain( @@ -199,53 +179,6 @@ public void testCardinalityArrayElementKeepsOffsetPath() throws Exception { ImmutableList.of(path("a", "*"))); } - @Test - public void testDeeperOffsetPathCoversShallowerOffsetPath() throws Exception { - // cardinality(a) on ARRAY> generates [a, OFFSET] - // cardinality(element_at(a, 1)) generates [a, *, OFFSET] - // [a, *, OFFSET] traverses into the outer array's items, so it must read - // through the outer array structure. The shallower [a, OFFSET] is therefore - // redundant and should be stripped. - assertAllAccessPathsContain( - "select cardinality(a), cardinality(element_at(a, 1)) from nested_array_tbl", - ImmutableList.of(path("a", "*", "OFFSET")), - ImmutableList.of(path("a", "OFFSET"))); - } - - @Test - public void testOffsetPathCoversNullPathWithSamePrefix() throws Exception { - assertAllAccessPathsContain( - "select cardinality(a), a is null from nested_array_tbl", - ImmutableList.of(path("a", "OFFSET")), - ImmutableList.of(path("a", "NULL"))); - } - - @Test - public void testDeeperNullPathCoversShallowerNullPath() throws Exception { - // a IS NULL on ARRAY> generates [a, NULL] - // element_at(a, 1) IS NULL generates [a, *, NULL] - // [a, *, NULL] traverses into the outer array's items, which requires - // reading the outer array's null bitmap. Therefore [a, NULL] is redundant - // and should be stripped. - assertAllAccessPathsContain( - "select a is null, element_at(a, 1) is null from nested_array_tbl", - ImmutableList.of(path("a", "*", "NULL")), - ImmutableList.of(path("a", "NULL"))); - } - - @Test - public void testDeeperOffsetPathCoversShallowerNullPathForArray() throws Exception { - // cardinality(element_at(a, 1)) on ARRAY> generates [a, *, OFFSET] - // a IS NULL generates [a, NULL] - // [a, *, OFFSET] reads the offset of inner-array elements, which requires - // reading through the outer array's null bitmap. Therefore [a, NULL] is - // redundant and should be stripped. - assertAllAccessPathsContain( - "select cardinality(element_at(a, 1)), a is null from nested_array_tbl", - ImmutableList.of(path("a", "*", "OFFSET")), - ImmutableList.of(path("a", "NULL"))); - } - @Test public void testCardinalityMapElementKeepsValueOffsetPath() throws Exception { assertColumn("select cardinality(map_arr_col['a']) from map_array_tbl", @@ -254,114 +187,17 @@ public void testCardinalityMapElementKeepsValueOffsetPath() throws Exception { ImmutableList.of()); } - @Test - public void testDeeperNullCoversValueOffsetForMapArray() throws Exception { - // cardinality(map_arr_col['a']) -> normalized to KEYS + VALUES.OFFSET - // element_at(map_arr_col['a'], 1) IS NULL -> *.*.NULL - // *.*.NULL goes deeper into the value-side array, so VALUES.OFFSET - // at the map value level is redundant. Without type-aware comparison - // * is not lexically equal to VALUES, so VALUES.OFFSET would survive - // and cause BE to skip the item iterator. - assertAllAccessPathsContain( - "select cardinality(map_arr_col['a']), " - + "element_at(map_arr_col['a'], 1) is null from map_array_tbl", - ImmutableList.of(path("map_arr_col", "KEYS"), path("map_arr_col", "VALUES", "*", "NULL")), - ImmutableList.of(path("map_arr_col", "VALUES", "OFFSET"))); - } - - @Test - public void testDataPathCoversNullPathWithMapAwareComparison() throws Exception { - // element_at(map_col, 'a') -> [map_col, KEYS] + [map_col, VALUES] - // element_at(map_values(map_col), 1) IS NULL -> [map_col, VALUES, NULL] - // Level 1 strips [map_col, VALUES, NULL] because [map_col, VALUES] covers it. - // - // NOT: map_values(map_col) IS NULL — visitMapValues special-cases a lone NULL - // suffix as a parent-map null check (isFunctionNullCheckPath), producing - // [map_col, NULL] instead of [map_col, VALUES, NULL]. - assertAllAccessPathsContain( - "select element_at(map_col, 'a') from str_tbl" - + " where element_at(map_values(map_col), 1) is null", - ImmutableList.of(path("map_col", "KEYS"), path("map_col", "VALUES")), - ImmutableList.of(path("map_col", "VALUES", "NULL"))); - } - - @Test - public void testDataPathCoversOffsetPathWithMapAwareComparison() throws Exception { - // element_at(map_col, 'a') -> [map_col, KEYS] + [map_col, VALUES] - // length(element_at(map_values(map_col), 1)) > 0 -> [map_col, VALUES, OFFSET] - // Level 2 strips [map_col, VALUES, OFFSET] because [map_col, VALUES] covers it. - // - // NOT: cardinality(map_values(map_col)) — visitCardinality unwraps MapValues - // and produces [map_col, OFFSET] instead of [map_col, VALUES, OFFSET]. - assertAllAccessPathsContain( - "select element_at(map_col, 'a') from str_tbl" - + " where length(element_at(map_values(map_col), 1)) > 0", - ImmutableList.of(path("map_col", "KEYS"), path("map_col", "VALUES")), - ImmutableList.of(path("map_col", "VALUES", "OFFSET"))); - } - - @Test - public void testMapValuesCoversStarNullPreservesKeysPath() throws Exception { - // After map-* normalization: - // element_at(map_col, 'a') -> [map_col, KEYS] + [map_col, VALUES] - // element_at(map_col, 'a') IS NULL -> [map_col, VALUES, NULL] + [map_col, KEYS] - // element_at(map_values(map_col), 1) -> [map_col, VALUES] - // Level 1 strips [map_col, VALUES, NULL] because [map_col, VALUES] covers it. - // The KEYS path is preserved for key lookup. - assertAllAccessPathsContain( - "select element_at(map_values(map_col), 1) from str_tbl" - + " where element_at(map_col, 'a') is null", - ImmutableList.of(path("map_col", "KEYS"), path("map_col", "VALUES")), - ImmutableList.of(path("map_col", "VALUES", "NULL"))); - } - - @Test - public void testSupplementalKeyPathShouldStripExistingKeyNullPath() throws Exception { - // After map-* normalization: - // projection: [map_arr_col, VALUES] - // filter: [map_arr_col, KEYS, NULL] - // OR [map_arr_col, *, *, NULL] → [map_arr_col, VALUES, *, NULL] - // + [map_arr_col, KEYS] - // Level 1 strips [map_arr_col, KEYS, NULL] because [map_arr_col, KEYS] now - // exists before the strip phase (produced by normalization, not supplemental). - // Level 2 strips [map_arr_col, VALUES, *, NULL] because [map_arr_col, VALUES] - // covers the value-side prefix. - assertAllAccessPathsContain( - "select element_at(map_values(map_arr_col), 1) from map_array_tbl" - + " where element_at(map_keys(map_arr_col), 1) is null" - + " or element_at(element_at(map_arr_col, 'a'), 1) is null", - ImmutableList.of(path("map_arr_col", "KEYS"), path("map_arr_col", "VALUES")), - ImmutableList.of(path("map_arr_col", "VALUES", "*", "NULL"), - path("map_arr_col", "KEYS", "NULL"))); - } - - @Test - public void testNestedMapElementLengthKeepsValueOffsetPath() throws Exception { - assertColumn("select length(element_at(element_at(s, 'm'), 'a')) from nested_container_tbl", - "struct>", - ImmutableList.of(path("s", "m", "KEYS"), path("s", "m", "VALUES", "OFFSET")), - ImmutableList.of()); - } - - @Test - public void testNestedMapElementIsNullKeepsValueIsNullPath() throws Exception { - assertColumn("select (element_at(element_at(s, 'm'), 'a')) is null from nested_container_tbl", - "struct>", - ImmutableList.of(path("s", "m", "KEYS"), path("s", "m", "VALUES", "NULL")), - ImmutableList.of()); - } - @Test public void testFullFieldAccessStripsExactDataSkippingPath() throws Exception { assertColumn("select element_at(s, 'city') from tbl " + "where element_at(s, 'city') is null", "struct", ImmutableList.of(path("s", "city")), - ImmutableList.of()); + ImmutableList.of(path("s", "city", "NULL"))); assertColumn("select cardinality(element_at(s, 'data')), element_at(s, 'data') from tbl", "struct>>>", - ImmutableList.of(path("s", "data")), + ImmutableList.of(path("s", "data"), path("s", "data", "OFFSET")), ImmutableList.of()); assertColumn("select cardinality(a), a from nested_array_tbl", @@ -371,13 +207,12 @@ public void testFullFieldAccessStripsExactDataSkippingPath() throws Exception { assertColumn("select cardinality(map_arr_col['a']), map_arr_col['a'] from map_array_tbl", "map>", - ImmutableList.of(path("map_arr_col", "KEYS"), path("map_arr_col", "VALUES")), + ImmutableList.of(path("map_arr_col", "KEYS"), path("map_arr_col", "VALUES"), path("map_arr_col", "VALUES", "OFFSET")), ImmutableList.of()); } @Test - public void testCardinalityMapElementOffsetCoveredByValueFieldAccess() throws Exception { - // [s, m, *, *, verified] strips [s, m, *, OFFSET] (pure string prefix). + public void testCardinalityMapElementOffsetPredicateStaysOutOfAllAccessPaths() throws Exception { Pair> result = collectComplexSlots( "select element_at(element_at(element_at(element_at(s, 'm'), 'null'), 1), 'verified') " + "from map_array_value_tbl " @@ -388,14 +223,23 @@ public void testCardinalityMapElementOffsetCoveredByValueFieldAccess() throws Ex allAccessPaths.addAll(slotDescriptor.getAllAccessPaths()); predicateAccessPaths.addAll(slotDescriptor.getPredicateAccessPaths()); } - Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES", "*", "verified"))); - Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "VALUES", "OFFSET"))); - Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m", "VALUES", "OFFSET"))); + Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "*", "OFFSET")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "KEYS")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES", "*", "verified")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES", "OFFSET")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(predicateAccessPaths.contains(path("s", "m", "KEYS")), + "predicateAccessPaths=" + predicateAccessPaths); + Assertions.assertTrue(predicateAccessPaths.contains(path("s", "m", "VALUES", "OFFSET")), + "predicateAccessPaths=" + predicateAccessPaths); } @Test - public void testMapElementArrayNullPathCoveredByValueFieldAccess() throws Exception { - // [s, m, *, *, verified] strips [s, m, *, NULL] (pure string prefix). + public void testMapElementArrayNullPredicateStaysOutOfAllAccessPaths() throws Exception { + // The map-star NULL path expands to precise KEYS/VALUES paths instead of broad s.m.*. Pair> result = collectComplexSlots( "select element_at(element_at(element_at(element_at(s, 'm'), 'null'), 1), 'verified') " + "from map_array_value_tbl " @@ -406,9 +250,18 @@ public void testMapElementArrayNullPathCoveredByValueFieldAccess() throws Except allAccessPaths.addAll(slotDescriptor.getAllAccessPaths()); predicateAccessPaths.addAll(slotDescriptor.getPredicateAccessPaths()); } - Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES", "*", "verified"))); - Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "VALUES", "NULL"))); - Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m", "VALUES", "NULL"))); + Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "*", "NULL")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "KEYS")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES", "*", "verified")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES", "NULL")), + "allAccessPaths=" + allAccessPaths); + Assertions.assertTrue(predicateAccessPaths.contains(path("s", "m", "KEYS")), + "predicateAccessPaths=" + predicateAccessPaths); + Assertions.assertTrue(predicateAccessPaths.contains(path("s", "m", "VALUES", "NULL")), + "predicateAccessPaths=" + predicateAccessPaths); } @Test @@ -716,11 +569,11 @@ public void testFilter() throws Throwable { ); // The IF expression itself is not collected as a null-only parent access here; the - // struct_element predicate still lets NCP prune the scan slot to the city field. + // struct_element predicate is covered by the stripped parent path in allPaths. assertColumn("select 100 from tbl where if(id = 1, null, s) is not null or element_at(s, 'city') = 'beijing'", - "struct", - ImmutableList.of(path("s", "city")), - ImmutableList.of(path("s", "city")) + "struct>>>", + ImmutableList.of(path("s")), + ImmutableList.of(path("s", "NULL"), path("s", "city")) ); assertColumn("select 100 from tbl where element_at(s, 'city') is not null", @@ -789,39 +642,28 @@ public void testMapKeysAndValuesFunctionNullCheckUseParentMapNullPath() throws E assertColumn("select map_keys(map_col) from str_tbl where map_keys(map_col) is null", "map", - ImmutableList.of(path("map_col", "KEYS")), - ImmutableList.of() + ImmutableList.of(path("map_col")), + ImmutableList.of(path("map_col", "NULL")) ); assertColumn("select map_values(map_col) from str_tbl where map_values(map_col) is null", "map", - ImmutableList.of(path("map_col", "VALUES")), - ImmutableList.of() + ImmutableList.of(path("map_col")), + ImmutableList.of(path("map_col", "NULL")) ); } @Test public void testProjectFilter() throws Throwable { - assertColumn("select s from tbl where element_at(s, 'city') is not null", - "struct>>>", - ImmutableList.of(path("s")), - ImmutableList.of() - ); - assertColumn("select s from tbl where element_at(s, 'city') is null", - "struct>>>", - ImmutableList.of(path("s")), - ImmutableList.of() - ); - assertColumn("select element_at(s, 'data') from tbl where element_at(s, 'city') is not null", "struct>>>", - ImmutableList.of(path("s", "city", "NULL"), path("s", "data")), + ImmutableList.of(path("s", "city"), path("s", "data")), ImmutableList.of(path("s", "city", "NULL")) ); assertColumn("select element_at(s, 'data') from tbl where element_at(s, 'city') is not null and element_at(s, 'data') is not null", "struct>>>", - ImmutableList.of(path("s", "city", "NULL"), path("s", "data")), - ImmutableList.of(path("s", "city", "NULL")) + ImmutableList.of(path("s", "city"), path("s", "data")), + ImmutableList.of(path("s", "city", "NULL"), path("s", "data", "NULL")) ); } @@ -1447,7 +1289,6 @@ private void assertColumns(String sql, TreeSet actualPredicateAccessPaths = new TreeSet<>(slotDescriptor.getPredicateAccessPaths()); Assertions.assertEquals(expectPredicateAccessPathSet, actualPredicateAccessPaths); - Assertions.assertTrue(actualAllAccessPaths.containsAll(actualPredicateAccessPaths)); Map slotIdToDataTypes = new LinkedHashMap<>(); Consumer assertHasSameType = e -> { @@ -1519,25 +1360,28 @@ public void testStructIsNotNullPruning() throws Exception { @Test public void testStructIsNullMixedAccess() throws Exception { - // Parent NULL path must be stripped from allPaths when a child path is also required. - // Otherwise BE StructFileColumnIterator sees the parent NULL sub-path first, switches - // the whole struct iterator to NULL_MAP_ONLY, and skips the child iterator. - // predicateAccessPaths drops [s, NULL] too, keeping it a subset of allAccessPaths. + // Predicate metadata paths stay in predicatePaths. allPaths keeps the stripped data path + // shape when ordinary data paths also exist, so older BEs do not switch the mixed read to + // current-level metadata-only mode. assertColumn("select element_at(s, 'city') from tbl where s is null", - "struct", - ImmutableList.of(path("s", "city")), - ImmutableList.of()); + "struct>>>", + ImmutableList.of(path("s")), + ImmutableList.of(path("s", "NULL"))); + + assertColumn("select s from tbl where element_at(s, 'city') is null", + "struct>>>", + ImmutableList.of(path("s")), + ImmutableList.of(path("s", "city", "NULL"))); // This shape is closer to the production bug: one predicate needs the parent // null map, another predicate needs a child null map, and the projection needs - // a different child data path. The parent [s.NULL] cannot remain in allPaths - // with [s.data], so it is also removed from predicate paths; [s.city.NULL] stays - // because it is still present in allPaths. + // a different child data path. allPaths strips predicate metadata to data paths and + // collapses to the whole struct for mixed-version safety. assertColumn("select element_at(s, 'data') from tbl " + "where s is null or element_at(s, 'city') is null", "struct>>>", - ImmutableList.of(path("s", "city", "NULL"), path("s", "data")), - ImmutableList.of(path("s", "city", "NULL"))); + ImmutableList.of(path("s")), + ImmutableList.of(path("s", "NULL"), path("s", "city", "NULL"))); } @Test @@ -1578,7 +1422,7 @@ public void testStringLengthPruning() throws Exception { assertColumn( "select length(element_at(c_struct, 'f3')), element_at(c_struct, 'f3') from str_tbl", "struct", - ImmutableList.of(path("c_struct", "f3")), + ImmutableList.of(path("c_struct", "f3"), path("c_struct", "f3", "OFFSET")), ImmutableList.of()); } @@ -1742,9 +1586,19 @@ private SlotReference rewriteAndFindScanSlot(String sql, String columnName, } private void assertNoAccessPaths(SlotReference slot) { - Assertions.assertTrue(!slot.getAllAccessPaths().isPresent() || slot.getAllAccessPaths().get().isEmpty()); + String slotDebugInfo = String.format( + "slot=%s, name=%s, exprId=%s, qualifier=%s, dataType=%s, nullable=%s, " + + "subPath=%s, originalColumn=%s, allAccessPaths=%s, " + + "predicateAccessPaths=%s, displayAllAccessPaths=%s, " + + "displayPredicateAccessPaths=%s", + slot, slot.getName(), slot.getExprId(), slot.getQualifier(), slot.getDataType(), + slot.nullable(), slot.getSubPath(), slot.getOriginalColumn().map(Object::toString), + slot.getAllAccessPaths(), slot.getPredicateAccessPaths(), + slot.getDisplayAllAccessPaths(), slot.getDisplayPredicateAccessPaths()); + Assertions.assertTrue(!slot.getAllAccessPaths().isPresent() || slot.getAllAccessPaths().get().isEmpty(), + slotDebugInfo); Assertions.assertTrue(!slot.getPredicateAccessPaths().isPresent() - || slot.getPredicateAccessPaths().get().isEmpty()); + || slot.getPredicateAccessPaths().get().isEmpty(), slotDebugInfo); } private Pair> collectComplexSlots(String sql) throws Exception { diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 0d8618dbc78a0f..11b8ab2d1f037b 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -500,6 +500,9 @@ struct TQueryOptions { // enable plan local exchange node in fe 223: optional bool enable_local_shuffle_planner; + + 224: optional bool enable_prune_nested_column = false; + // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. // In read path, read from file cache or remote storage when execute query. diff --git a/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out b/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out index b3312aa670c066..60d6198b597288 100644 --- a/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out +++ b/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out @@ -1,86 +1,389 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- -1 {"city":"beijing", "data":[{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}], "value":1} -2 {"city":"shanghai", "data":[{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}], "value":2} -3 {"city":"guangzhou", "data":[{1:{"a":90, "b":60}, 2:{"a":110, "b":40}}], "value":3} -4 {"city":"shenzhen", "data":[{2:{"a":130, "b":20}, 1:{"a":150, "b":40}}], "value":4} -5 {"city":"hangzhou", "data":[{1:{"a":170, "b":80}, 2:{"a":190, "b":40}}], "value":5} -6 {"city":"nanjing", "data":[{2:{"a":210, "b":60}, 1:{"a":230, "b":40}}], "value":6} -7 {"city":"tianjin", "data":[{1:{"a":250, "b":20}, 2:{"a":270, "b":40}}], "value":7} -8 {"city":"chongqing", "data":[{2:{"a":290, "b":80}, 1:{"a":310, "b":40}}], "value":8} -9 {"city":"wuhan", "data":[{1:{"a":330, "b":60}, 2:{"a":350, "b":40}}], "value":9} -10 {"city":"xian", "data":[{2:{"a":370, "b":20}, 1:{"a":390, "b":40}}], "value":10} -11 {"city":"changsha", "data":[{1:{"a":410, "b":80}, 2:{"a":430, "b":40}}], "value":11} -12 {"city":"qingdao", "data":[{2:{"a":450, "b":60}, 1:{"a":470, "b":40}}], "value":12} -13 {"city":"dalian", "data":[{1:{"a":490, "b":20}, 2:{"a":510, "b":40}}], "value":13} +\N 300 +beijing 300 +chengdu 300 +guangzhou 300 +hangzhou 300 +nanjing 300 +shanghai 300 +shenzhen 300 +wuhan 300 +xian 300 -- !sql1 -- -1 [10] +1 [10, 5] + +-- !sql1_1 -- + +-- !sql1_2 -- -- !sql2 -- -1 beijing -2 shanghai +0 beijing +1 shanghai +2 shenzhen 3 guangzhou -4 shenzhen -5 hangzhou -6 nanjing -7 tianjin -8 chongqing -9 wuhan -10 xian -11 changsha -12 qingdao -13 dalian +4 hangzhou +5 chengdu +6 wuhan +7 xian +8 nanjing +9 \N +10 beijing +11 shanghai +12 shenzhen +13 guangzhou +14 hangzhou +15 chengdu +16 wuhan +17 xian +18 nanjing +19 \N + +-- !sql2_1 -- +100 beijing +101 shanghai +102 shenzhen +103 guangzhou +104 hangzhou +105 chengdu +106 wuhan +107 xian +108 nanjing +109 \N +110 beijing +111 shanghai +112 shenzhen +113 guangzhou +114 hangzhou +115 chengdu +116 wuhan +117 xian +118 nanjing +119 \N + +-- !sql2_2 -- +2999 \N +2998 nanjing +2997 xian +2996 wuhan +2995 chengdu +2994 hangzhou +2993 guangzhou +2992 shenzhen +2991 shanghai +2990 beijing +2989 \N +2988 nanjing +2987 xian +2986 wuhan +2985 chengdu +2984 hangzhou +2983 guangzhou +2982 shenzhen +2981 shanghai +2980 beijing -- !sql3 -- -1 [{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}] -2 [{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}] -3 [{1:{"a":90, "b":60}, 2:{"a":110, "b":40}}] -4 [{2:{"a":130, "b":20}, 1:{"a":150, "b":40}}] -5 [{1:{"a":170, "b":80}, 2:{"a":190, "b":40}}] -6 [{2:{"a":210, "b":60}, 1:{"a":230, "b":40}}] -7 [{1:{"a":250, "b":20}, 2:{"a":270, "b":40}}] -8 [{2:{"a":290, "b":80}, 1:{"a":310, "b":40}}] -9 [{1:{"a":330, "b":60}, 2:{"a":350, "b":40}}] -10 [{2:{"a":370, "b":20}, 1:{"a":390, "b":40}}] -11 [{1:{"a":410, "b":80}, 2:{"a":430, "b":40}}] -12 [{2:{"a":450, "b":60}, 1:{"a":470, "b":40}}] -13 [{1:{"a":490, "b":20}, 2:{"a":510, "b":40}}] +0 [{1:{"a":0, "b":0}, 2:{"a":20, "b":10}}, {1:{"a":0, "b":0}, 2:{"a":0, "b":0}}] +1 [{1:{"a":10, "b":11}, 2:{"a":30, "b":20}}, {2:{"a":5, "b":2.5}, 3:{"a":3, "b":1.5}}] +2 [{1:{"a":20, "b":22}, 2:{"a":40, "b":30}}, {3:{"a":10, "b":5}, 4:{"a":6, "b":3}}] +3 [{1:{"a":30, "b":33}, 2:{"a":50, "b":40}}, {1:{"a":15, "b":7.5}, 5:{"a":9, "b":4.5}}] +4 [{1:{"a":40, "b":44}, 2:{"a":60, "b":50}}, {2:{"a":20, "b":10}, 6:{"a":12, "b":6}}] +5 [{1:{"a":50, "b":50}, 2:{"a":70, "b":60}}, {3:{"a":25, "b":12.5}, 2:{"a":15, "b":7.5}}] +6 [{1:{"a":60, "b":61}, 2:{"a":80, "b":70}}, {1:{"a":30, "b":15}, 3:{"a":18, "b":9}}] +7 [{1:{"a":70, "b":72}, 2:{"a":90, "b":80}}, {2:{"a":35, "b":17.5}, 4:{"a":21, "b":10.5}}] +8 [{1:{"a":80, "b":83}, 2:{"a":100, "b":90}}, {3:{"a":40, "b":20}, 5:{"a":24, "b":12}}] +9 [{1:{"a":90, "b":94}, 2:{"a":110, "b":100}}, {1:{"a":45, "b":22.5}, 6:{"a":27, "b":13.5}}] +10 [{1:{"a":100, "b":100}, 2:{"a":120, "b":10}}, {2:{"a":30, "b":15}}] +11 [{1:{"a":110, "b":111}, 2:{"a":130, "b":20}}, {3:{"a":33, "b":16.5}}] +12 [{1:{"a":120, "b":122}, 2:{"a":140, "b":30}}, {1:{"a":60, "b":30}, 4:{"a":36, "b":18}}] +13 [{1:{"a":130, "b":133}, 2:{"a":150, "b":40}}, {2:{"a":65, "b":32.5}, 5:{"a":39, "b":19.5}}] +14 [{1:{"a":140, "b":144}, 2:{"a":160, "b":50}}, {3:{"a":70, "b":35}, 6:{"a":42, "b":21}}] +15 [{1:{"a":150, "b":150}, 2:{"a":170, "b":60}}, {1:{"a":75, "b":37.5}, 2:{"a":45, "b":22.5}}] +16 [{1:{"a":160, "b":161}, 2:{"a":180, "b":70}}, {2:{"a":80, "b":40}, 3:{"a":48, "b":24}}] +17 [{1:{"a":170, "b":172}, 2:{"a":190, "b":80}}, {3:{"a":85, "b":42.5}, 4:{"a":51, "b":25.5}}] +18 [{1:{"a":180, "b":183}, 2:{"a":200, "b":90}}, {1:{"a":90, "b":45}, 5:{"a":54, "b":27}}] +19 [{1:{"a":190, "b":194}, 2:{"a":210, "b":100}}, {2:{"a":95, "b":47.5}, 6:{"a":57, "b":28.5}}] + +-- !sql3_1 -- +200 [{1:{"a":2000, "b":2000}, 2:{"a":2020, "b":10}}, {3:{"a":1000, "b":500}, 2:{"a":600, "b":300}}] +201 [{1:{"a":2010, "b":2011}, 2:{"a":2030, "b":20}}, {1:{"a":1005, "b":502.5}, 3:{"a":603, "b":301.5}}] +202 [{1:{"a":2020, "b":2022}, 2:{"a":2040, "b":30}}, {2:{"a":1010, "b":505}, 4:{"a":606, "b":303}}] +203 [{1:{"a":2030, "b":2033}, 2:{"a":2050, "b":40}}, {3:{"a":1015, "b":507.5}, 5:{"a":609, "b":304.5}}] +204 [{1:{"a":2040, "b":2044}, 2:{"a":2060, "b":50}}, {1:{"a":1020, "b":510}, 6:{"a":612, "b":306}}] +205 [{1:{"a":2050, "b":2050}, 2:{"a":2070, "b":60}}, {2:{"a":615, "b":307.5}}] +206 [{1:{"a":2060, "b":2061}, 2:{"a":2080, "b":70}}, {3:{"a":618, "b":309}}] +207 [{1:{"a":2070, "b":2072}, 2:{"a":2090, "b":80}}, {1:{"a":1035, "b":517.5}, 4:{"a":621, "b":310.5}}] +208 [{1:{"a":2080, "b":2083}, 2:{"a":2100, "b":90}}, {2:{"a":1040, "b":520}, 5:{"a":624, "b":312}}] +209 [{1:{"a":2090, "b":2094}, 2:{"a":2110, "b":100}}, {3:{"a":1045, "b":522.5}, 6:{"a":627, "b":313.5}}] +210 [{1:{"a":2100, "b":2100}, 2:{"a":2120, "b":10}}, {1:{"a":1050, "b":525}, 2:{"a":630, "b":315}}] +211 [{1:{"a":2110, "b":2111}, 2:{"a":2130, "b":20}}, {2:{"a":1055, "b":527.5}, 3:{"a":633, "b":316.5}}] +212 [{1:{"a":2120, "b":2122}, 2:{"a":2140, "b":30}}, {3:{"a":1060, "b":530}, 4:{"a":636, "b":318}}] +213 [{1:{"a":2130, "b":2133}, 2:{"a":2150, "b":40}}, {1:{"a":1065, "b":532.5}, 5:{"a":639, "b":319.5}}] +214 [{1:{"a":2140, "b":2144}, 2:{"a":2160, "b":50}}, {2:{"a":1070, "b":535}, 6:{"a":642, "b":321}}] +215 [{1:{"a":2150, "b":2150}, 2:{"a":2170, "b":60}}, {3:{"a":1075, "b":537.5}, 2:{"a":645, "b":322.5}}] +216 [{1:{"a":2160, "b":2161}, 2:{"a":2180, "b":70}}, {1:{"a":1080, "b":540}, 3:{"a":648, "b":324}}] +217 [{1:{"a":2170, "b":2172}, 2:{"a":2190, "b":80}}, {2:{"a":1085, "b":542.5}, 4:{"a":651, "b":325.5}}] +218 [{1:{"a":2180, "b":2183}, 2:{"a":2200, "b":90}}, {3:{"a":1090, "b":545}, 5:{"a":654, "b":327}}] +219 [{1:{"a":2190, "b":2194}, 2:{"a":2210, "b":100}}, {1:{"a":1095, "b":547.5}, 6:{"a":657, "b":328.5}}] + +-- !sql3_2 -- +2999 [{1:{"a":29990, "b":29994}, 2:{"a":30010, "b":100}}, {3:{"a":14995, "b":7497.5}, 6:{"a":8997, "b":4498.5}}] +2998 [{1:{"a":29980, "b":29983}, 2:{"a":30000, "b":90}}, {2:{"a":14990, "b":7495}, 5:{"a":8994, "b":4497}}] +2997 [{1:{"a":29970, "b":29972}, 2:{"a":29990, "b":80}}, {1:{"a":14985, "b":7492.5}, 4:{"a":8991, "b":4495.5}}] +2996 [{1:{"a":29960, "b":29961}, 2:{"a":29980, "b":70}}, {3:{"a":8988, "b":4494}}] +2995 [{1:{"a":29950, "b":29950}, 2:{"a":29970, "b":60}}, {2:{"a":8985, "b":4492.5}}] +2994 [{1:{"a":29940, "b":29944}, 2:{"a":29960, "b":50}}, {1:{"a":14970, "b":7485}, 6:{"a":8982, "b":4491}}] +2993 [{1:{"a":29930, "b":29933}, 2:{"a":29950, "b":40}}, {3:{"a":14965, "b":7482.5}, 5:{"a":8979, "b":4489.5}}] +2992 [{1:{"a":29920, "b":29922}, 2:{"a":29940, "b":30}}, {2:{"a":14960, "b":7480}, 4:{"a":8976, "b":4488}}] +2991 [{1:{"a":29910, "b":29911}, 2:{"a":29930, "b":20}}, {1:{"a":14955, "b":7477.5}, 3:{"a":8973, "b":4486.5}}] +2990 [{1:{"a":29900, "b":29900}, 2:{"a":29920, "b":10}}, {3:{"a":14950, "b":7475}, 2:{"a":8970, "b":4485}}] +2989 [{1:{"a":29890, "b":29894}, 2:{"a":29910, "b":100}}, {2:{"a":14945, "b":7472.5}, 6:{"a":8967, "b":4483.5}}] +2988 [{1:{"a":29880, "b":29883}, 2:{"a":29900, "b":90}}, {1:{"a":14940, "b":7470}, 5:{"a":8964, "b":4482}}] +2987 [{1:{"a":29870, "b":29872}, 2:{"a":29890, "b":80}}, {3:{"a":14935, "b":7467.5}, 4:{"a":8961, "b":4480.5}}] +2986 [{1:{"a":29860, "b":29861}, 2:{"a":29880, "b":70}}, {2:{"a":14930, "b":7465}, 3:{"a":8958, "b":4479}}] +2985 [{1:{"a":29850, "b":29850}, 2:{"a":29870, "b":60}}, {1:{"a":14925, "b":7462.5}, 2:{"a":8955, "b":4477.5}}] +2984 [{1:{"a":29840, "b":29844}, 2:{"a":29860, "b":50}}, {3:{"a":14920, "b":7460}, 6:{"a":8952, "b":4476}}] +2983 [{1:{"a":29830, "b":29833}, 2:{"a":29850, "b":40}}, {2:{"a":14915, "b":7457.5}, 5:{"a":8949, "b":4474.5}}] +2982 [{1:{"a":29820, "b":29822}, 2:{"a":29840, "b":30}}, {1:{"a":14910, "b":7455}, 4:{"a":8946, "b":4473}}] +2981 [{1:{"a":29810, "b":29811}, 2:{"a":29830, "b":20}}, {3:{"a":8943, "b":4471.5}}] +2980 [{1:{"a":29800, "b":29800}, 2:{"a":29820, "b":10}}, {2:{"a":8940, "b":4470}}] -- !sql4 -- -1 [{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}] -2 [{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}] -3 [{1:{"a":90, "b":60}, 2:{"a":110, "b":40}}] -5 [{1:{"a":170, "b":80}, 2:{"a":190, "b":40}}] -7 [{1:{"a":250, "b":20}, 2:{"a":270, "b":40}}] -9 [{1:{"a":330, "b":60}, 2:{"a":350, "b":40}}] -11 [{1:{"a":410, "b":80}, 2:{"a":430, "b":40}}] -13 [{1:{"a":490, "b":20}, 2:{"a":510, "b":40}}] +3 [{1:{"a":30, "b":33}, 2:{"a":50, "b":40}}, {1:{"a":15, "b":7.5}, 5:{"a":9, "b":4.5}}] +13 [{1:{"a":130, "b":133}, 2:{"a":150, "b":40}}, {2:{"a":65, "b":32.5}, 5:{"a":39, "b":19.5}}] +23 [{1:{"a":230, "b":233}, 2:{"a":250, "b":40}}, {3:{"a":115, "b":57.5}, 5:{"a":69, "b":34.5}}] +33 [{1:{"a":330, "b":333}, 2:{"a":350, "b":40}}, {1:{"a":165, "b":82.5}, 5:{"a":99, "b":49.5}}] +43 [{1:{"a":430, "b":433}, 2:{"a":450, "b":40}}, {2:{"a":215, "b":107.5}, 5:{"a":129, "b":64.5}}] +53 [{1:{"a":530, "b":533}, 2:{"a":550, "b":40}}, {3:{"a":265, "b":132.5}, 5:{"a":159, "b":79.5}}] +63 [{1:{"a":630, "b":633}, 2:{"a":650, "b":40}}, {1:{"a":315, "b":157.5}, 5:{"a":189, "b":94.5}}] +73 [{1:{"a":730, "b":733}, 2:{"a":750, "b":40}}, {2:{"a":365, "b":182.5}, 5:{"a":219, "b":109.5}}] +83 [{1:{"a":830, "b":833}, 2:{"a":850, "b":40}}, {3:{"a":415, "b":207.5}, 5:{"a":249, "b":124.5}}] +93 [{1:{"a":930, "b":933}, 2:{"a":950, "b":40}}, {1:{"a":465, "b":232.5}, 5:{"a":279, "b":139.5}}] +103 [{1:{"a":1030, "b":1033}, 2:{"a":1050, "b":40}}, {2:{"a":515, "b":257.5}, 5:{"a":309, "b":154.5}}] +113 [{1:{"a":1130, "b":1133}, 2:{"a":1150, "b":40}}, {3:{"a":565, "b":282.5}, 5:{"a":339, "b":169.5}}] +123 [{1:{"a":1230, "b":1233}, 2:{"a":1250, "b":40}}, {1:{"a":615, "b":307.5}, 5:{"a":369, "b":184.5}}] +133 [{1:{"a":1330, "b":1333}, 2:{"a":1350, "b":40}}, {2:{"a":665, "b":332.5}, 5:{"a":399, "b":199.5}}] +143 [{1:{"a":1430, "b":1433}, 2:{"a":1450, "b":40}}, {3:{"a":715, "b":357.5}, 5:{"a":429, "b":214.5}}] +153 [{1:{"a":1530, "b":1533}, 2:{"a":1550, "b":40}}, {1:{"a":765, "b":382.5}, 5:{"a":459, "b":229.5}}] +163 [{1:{"a":1630, "b":1633}, 2:{"a":1650, "b":40}}, {2:{"a":815, "b":407.5}, 5:{"a":489, "b":244.5}}] +173 [{1:{"a":1730, "b":1733}, 2:{"a":1750, "b":40}}, {3:{"a":865, "b":432.5}, 5:{"a":519, "b":259.5}}] +183 [{1:{"a":1830, "b":1833}, 2:{"a":1850, "b":40}}, {1:{"a":915, "b":457.5}, 5:{"a":549, "b":274.5}}] +193 [{1:{"a":1930, "b":1933}, 2:{"a":1950, "b":40}}, {2:{"a":965, "b":482.5}, 5:{"a":579, "b":289.5}}] + +-- !sql4_1 -- +1003 [{1:{"a":10030, "b":10033}, 2:{"a":10050, "b":40}}, {2:{"a":5015, "b":2507.5}, 5:{"a":3009, "b":1504.5}}] +1013 [{1:{"a":10130, "b":10133}, 2:{"a":10150, "b":40}}, {3:{"a":5065, "b":2532.5}, 5:{"a":3039, "b":1519.5}}] +1023 [{1:{"a":10230, "b":10233}, 2:{"a":10250, "b":40}}, {1:{"a":5115, "b":2557.5}, 5:{"a":3069, "b":1534.5}}] +1033 [{1:{"a":10330, "b":10333}, 2:{"a":10350, "b":40}}, {2:{"a":5165, "b":2582.5}, 5:{"a":3099, "b":1549.5}}] +1043 [{1:{"a":10430, "b":10433}, 2:{"a":10450, "b":40}}, {3:{"a":5215, "b":2607.5}, 5:{"a":3129, "b":1564.5}}] +1053 [{1:{"a":10530, "b":10533}, 2:{"a":10550, "b":40}}, {1:{"a":5265, "b":2632.5}, 5:{"a":3159, "b":1579.5}}] +1063 [{1:{"a":10630, "b":10633}, 2:{"a":10650, "b":40}}, {2:{"a":5315, "b":2657.5}, 5:{"a":3189, "b":1594.5}}] +1073 [{1:{"a":10730, "b":10733}, 2:{"a":10750, "b":40}}, {3:{"a":5365, "b":2682.5}, 5:{"a":3219, "b":1609.5}}] +1083 [{1:{"a":10830, "b":10833}, 2:{"a":10850, "b":40}}, {1:{"a":5415, "b":2707.5}, 5:{"a":3249, "b":1624.5}}] +1093 [{1:{"a":10930, "b":10933}, 2:{"a":10950, "b":40}}, {2:{"a":5465, "b":2732.5}, 5:{"a":3279, "b":1639.5}}] +1103 [{1:{"a":11030, "b":11033}, 2:{"a":11050, "b":40}}, {3:{"a":5515, "b":2757.5}, 5:{"a":3309, "b":1654.5}}] +1113 [{1:{"a":11130, "b":11133}, 2:{"a":11150, "b":40}}, {1:{"a":5565, "b":2782.5}, 5:{"a":3339, "b":1669.5}}] +1123 [{1:{"a":11230, "b":11233}, 2:{"a":11250, "b":40}}, {2:{"a":5615, "b":2807.5}, 5:{"a":3369, "b":1684.5}}] +1133 [{1:{"a":11330, "b":11333}, 2:{"a":11350, "b":40}}, {3:{"a":5665, "b":2832.5}, 5:{"a":3399, "b":1699.5}}] +1143 [{1:{"a":11430, "b":11433}, 2:{"a":11450, "b":40}}, {1:{"a":5715, "b":2857.5}, 5:{"a":3429, "b":1714.5}}] +1153 [{1:{"a":11530, "b":11533}, 2:{"a":11550, "b":40}}, {2:{"a":5765, "b":2882.5}, 5:{"a":3459, "b":1729.5}}] +1163 [{1:{"a":11630, "b":11633}, 2:{"a":11650, "b":40}}, {3:{"a":5815, "b":2907.5}, 5:{"a":3489, "b":1744.5}}] +1173 [{1:{"a":11730, "b":11733}, 2:{"a":11750, "b":40}}, {1:{"a":5865, "b":2932.5}, 5:{"a":3519, "b":1759.5}}] +1183 [{1:{"a":11830, "b":11833}, 2:{"a":11850, "b":40}}, {2:{"a":5915, "b":2957.5}, 5:{"a":3549, "b":1774.5}}] +1193 [{1:{"a":11930, "b":11933}, 2:{"a":11950, "b":40}}, {3:{"a":5965, "b":2982.5}, 5:{"a":3579, "b":1789.5}}] + +-- !sql4_2 -- +2993 [{1:{"a":29930, "b":29933}, 2:{"a":29950, "b":40}}, {3:{"a":14965, "b":7482.5}, 5:{"a":8979, "b":4489.5}}] +2983 [{1:{"a":29830, "b":29833}, 2:{"a":29850, "b":40}}, {2:{"a":14915, "b":7457.5}, 5:{"a":8949, "b":4474.5}}] +2973 [{1:{"a":29730, "b":29733}, 2:{"a":29750, "b":40}}, {1:{"a":14865, "b":7432.5}, 5:{"a":8919, "b":4459.5}}] +2963 [{1:{"a":29630, "b":29633}, 2:{"a":29650, "b":40}}, {3:{"a":14815, "b":7407.5}, 5:{"a":8889, "b":4444.5}}] +2953 [{1:{"a":29530, "b":29533}, 2:{"a":29550, "b":40}}, {2:{"a":14765, "b":7382.5}, 5:{"a":8859, "b":4429.5}}] +2943 [{1:{"a":29430, "b":29433}, 2:{"a":29450, "b":40}}, {1:{"a":14715, "b":7357.5}, 5:{"a":8829, "b":4414.5}}] +2933 [{1:{"a":29330, "b":29333}, 2:{"a":29350, "b":40}}, {3:{"a":14665, "b":7332.5}, 5:{"a":8799, "b":4399.5}}] +2923 [{1:{"a":29230, "b":29233}, 2:{"a":29250, "b":40}}, {2:{"a":14615, "b":7307.5}, 5:{"a":8769, "b":4384.5}}] +2913 [{1:{"a":29130, "b":29133}, 2:{"a":29150, "b":40}}, {1:{"a":14565, "b":7282.5}, 5:{"a":8739, "b":4369.5}}] +2903 [{1:{"a":29030, "b":29033}, 2:{"a":29050, "b":40}}, {3:{"a":14515, "b":7257.5}, 5:{"a":8709, "b":4354.5}}] +2893 [{1:{"a":28930, "b":28933}, 2:{"a":28950, "b":40}}, {2:{"a":14465, "b":7232.5}, 5:{"a":8679, "b":4339.5}}] +2883 [{1:{"a":28830, "b":28833}, 2:{"a":28850, "b":40}}, {1:{"a":14415, "b":7207.5}, 5:{"a":8649, "b":4324.5}}] +2873 [{1:{"a":28730, "b":28733}, 2:{"a":28750, "b":40}}, {3:{"a":14365, "b":7182.5}, 5:{"a":8619, "b":4309.5}}] +2863 [{1:{"a":28630, "b":28633}, 2:{"a":28650, "b":40}}, {2:{"a":14315, "b":7157.5}, 5:{"a":8589, "b":4294.5}}] +2853 [{1:{"a":28530, "b":28533}, 2:{"a":28550, "b":40}}, {1:{"a":14265, "b":7132.5}, 5:{"a":8559, "b":4279.5}}] +2843 [{1:{"a":28430, "b":28433}, 2:{"a":28450, "b":40}}, {3:{"a":14215, "b":7107.5}, 5:{"a":8529, "b":4264.5}}] +2833 [{1:{"a":28330, "b":28333}, 2:{"a":28350, "b":40}}, {2:{"a":14165, "b":7082.5}, 5:{"a":8499, "b":4249.5}}] +2823 [{1:{"a":28230, "b":28233}, 2:{"a":28250, "b":40}}, {1:{"a":14115, "b":7057.5}, 5:{"a":8469, "b":4234.5}}] +2813 [{1:{"a":28130, "b":28133}, 2:{"a":28150, "b":40}}, {3:{"a":14065, "b":7032.5}, 5:{"a":8439, "b":4219.5}}] +2803 [{1:{"a":28030, "b":28033}, 2:{"a":28050, "b":40}}, {2:{"a":14015, "b":7007.5}, 5:{"a":8409, "b":4204.5}}] -- !sql5 -- -1 beijing -2 shanghai 3 guangzhou -5 hangzhou -7 tianjin -9 wuhan -11 changsha -13 dalian +13 guangzhou +23 guangzhou +33 guangzhou +43 guangzhou +53 guangzhou +63 guangzhou +73 guangzhou +83 guangzhou +93 guangzhou +103 guangzhou +113 guangzhou +123 guangzhou +133 guangzhou +143 guangzhou +153 guangzhou +163 guangzhou +173 guangzhou +183 guangzhou +193 guangzhou -- !sql5_1 -- -61 +1003 guangzhou +1013 guangzhou +1023 guangzhou +1033 guangzhou +1043 guangzhou +1053 guangzhou +1063 guangzhou +1073 guangzhou +1083 guangzhou +1093 guangzhou +1103 guangzhou +1113 guangzhou +1123 guangzhou +1133 guangzhou +1143 guangzhou +1153 guangzhou +1163 guangzhou +1173 guangzhou +1183 guangzhou +1193 guangzhou -- !sql5_2 -- +2993 guangzhou +2983 guangzhou +2973 guangzhou +2963 guangzhou +2953 guangzhou +2943 guangzhou +2933 guangzhou +2923 guangzhou +2913 guangzhou +2903 guangzhou +2893 guangzhou +2883 guangzhou +2873 guangzhou +2863 guangzhou +2853 guangzhou +2843 guangzhou +2833 guangzhou +2823 guangzhou +2813 guangzhou +2803 guangzhou + +-- !sql5_3 -- 61 +-- !sql5_4 -- +61 + +-- !sql5_5 -- +9 {"city":null, "data":[{1:{"a":90, "b":94}, 2:{"a":110, "b":100}}, {1:{"a":45, "b":22.5}, 6:{"a":27, "b":13.5}}], "value":9} +19 {"city":null, "data":[{1:{"a":190, "b":194}, 2:{"a":210, "b":100}}, {2:{"a":95, "b":47.5}, 6:{"a":57, "b":28.5}}], "value":19} +29 {"city":null, "data":[{1:{"a":290, "b":294}, 2:{"a":310, "b":100}}, {3:{"a":145, "b":72.5}, 6:{"a":87, "b":43.5}}], "value":29} +39 {"city":null, "data":[{1:{"a":390, "b":394}, 2:{"a":410, "b":100}}, {1:{"a":195, "b":97.5}, 6:{"a":117, "b":58.5}}], "value":39} +49 {"city":null, "data":[{1:{"a":490, "b":494}, 2:{"a":510, "b":100}}, {2:{"a":245, "b":122.5}, 6:{"a":147, "b":73.5}}], "value":49} +59 {"city":null, "data":[{1:{"a":590, "b":594}, 2:{"a":610, "b":100}}, {3:{"a":295, "b":147.5}, 6:{"a":177, "b":88.5}}], "value":59} +69 {"city":null, "data":[{1:{"a":690, "b":694}, 2:{"a":710, "b":100}}, {1:{"a":345, "b":172.5}, 6:{"a":207, "b":103.5}}], "value":69} +79 {"city":null, "data":[{1:{"a":790, "b":794}, 2:{"a":810, "b":100}}, {2:{"a":395, "b":197.5}, 6:{"a":237, "b":118.5}}], "value":79} +89 {"city":null, "data":[{1:{"a":890, "b":894}, 2:{"a":910, "b":100}}, {3:{"a":445, "b":222.5}, 6:{"a":267, "b":133.5}}], "value":89} +99 {"city":null, "data":[{1:{"a":990, "b":994}, 2:{"a":1010, "b":100}}, {1:{"a":495, "b":247.5}, 6:{"a":297, "b":148.5}}], "value":99} +109 {"city":null, "data":[{1:{"a":1090, "b":1094}, 2:{"a":1110, "b":100}}, {2:{"a":545, "b":272.5}, 6:{"a":327, "b":163.5}}], "value":109} +119 {"city":null, "data":[{1:{"a":1190, "b":1194}, 2:{"a":1210, "b":100}}, {3:{"a":595, "b":297.5}, 6:{"a":357, "b":178.5}}], "value":119} +129 {"city":null, "data":[{1:{"a":1290, "b":1294}, 2:{"a":1310, "b":100}}, {1:{"a":645, "b":322.5}, 6:{"a":387, "b":193.5}}], "value":129} +139 {"city":null, "data":[{1:{"a":1390, "b":1394}, 2:{"a":1410, "b":100}}, {2:{"a":695, "b":347.5}, 6:{"a":417, "b":208.5}}], "value":139} +149 {"city":null, "data":[{1:{"a":1490, "b":1494}, 2:{"a":1510, "b":100}}, {3:{"a":745, "b":372.5}, 6:{"a":447, "b":223.5}}], "value":149} +159 {"city":null, "data":[{1:{"a":1590, "b":1594}, 2:{"a":1610, "b":100}}, {1:{"a":795, "b":397.5}, 6:{"a":477, "b":238.5}}], "value":159} +169 {"city":null, "data":[{1:{"a":1690, "b":1694}, 2:{"a":1710, "b":100}}, {2:{"a":845, "b":422.5}, 6:{"a":507, "b":253.5}}], "value":169} +179 {"city":null, "data":[{1:{"a":1790, "b":1794}, 2:{"a":1810, "b":100}}, {3:{"a":895, "b":447.5}, 6:{"a":537, "b":268.5}}], "value":179} +189 {"city":null, "data":[{1:{"a":1890, "b":1894}, 2:{"a":1910, "b":100}}, {1:{"a":945, "b":472.5}, 6:{"a":567, "b":283.5}}], "value":189} +199 {"city":null, "data":[{1:{"a":1990, "b":1994}, 2:{"a":2010, "b":100}}, {2:{"a":995, "b":497.5}, 6:{"a":597, "b":298.5}}], "value":199} + -- !sql6 -- -2 +5 12.5 +15 \N +25 \N +35 87.5 +45 \N +55 \N +65 162.5 +75 \N +85 \N +95 237.5 +105 \N +115 \N +125 312.5 +135 \N +145 \N +155 387.5 +165 \N +175 \N +185 462.5 +195 \N + +-- !sql6_1 -- +1005 \N +1015 \N +1025 2562.5 +1035 \N +1045 \N +1055 2637.5 +1065 \N +1075 \N +1085 2712.5 +1095 \N +1105 \N +1115 2787.5 +1125 \N +1135 \N +1145 2862.5 +1155 \N +1165 \N +1175 2937.5 +1185 \N +1195 \N + +-- !sql6_2 -- +2995 \N +2985 \N +2975 7437.5 +2965 \N +2955 \N +2945 7362.5 +2935 \N +2925 \N +2915 7287.5 +2905 \N +2895 \N +2885 7212.5 +2875 \N +2865 \N +2855 7137.5 +2845 \N +2835 \N +2825 7062.5 +2815 \N +2805 \N -- !sql7 -- +2 + +-- !sql8 -- 0.41 0.99 --- !sql8 -- +-- !sql9 -- \N added_z diff --git a/regression-test/data/nereids_rules_p0/column_pruning/nested_container_offset_pruning.out b/regression-test/data/nereids_rules_p0/column_pruning/nested_container_offset_pruning.out index 8b2d33fd6e3e32..e4dc236aa24916 100644 --- a/regression-test/data/nereids_rules_p0/column_pruning/nested_container_offset_pruning.out +++ b/regression-test/data/nereids_rules_p0/column_pruning/nested_container_offset_pruning.out @@ -1,7 +1,19 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !struct_root_arr_mixed -- 1 2 10 +2 0 \N +3 1 30 -- !struct_root_map_mixed -- 1 1 x +2 6 longer +3 \N only-b + +-- !struct_root_arr_predicate_mixed -- +1 hello +3 empty + +-- !struct_root_map_predicate_mixed -- +1 x +2 longer diff --git a/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy b/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy index 4e70c26819f482..798fd368c983ab 100644 --- a/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy +++ b/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy @@ -15,7 +15,10 @@ // specific language governing permissions and limitations // under the License. +import org.apache.doris.regression.action.ProfileAction + suite("test_pruned_columns") { + sql "set batch_size = 32;" sql """DROP TABLE IF EXISTS `tbl_test_pruned_columns`""" sql """ CREATE TABLE `tbl_test_pruned_columns` ( @@ -23,59 +26,171 @@ suite("test_pruned_columns") { `s` struct>>, value:int> NULL ) ENGINE=OLAP DUPLICATE KEY(`id`) - DISTRIBUTED BY RANDOM BUCKETS AUTO + DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ( "replication_allocation" = "tag.location.default: 1" ); """ sql """ - insert into `tbl_test_pruned_columns` values - (1, named_struct('city', 'beijing', 'data', array(map(1, named_struct('a', 10, 'b', 20.0), 2, named_struct('a', 30, 'b', 40))), 'value', 1)), - (2, named_struct('city', 'shanghai', 'data', array(map(2, named_struct('a', 50, 'b', 40.0), 1, named_struct('a', 70, 'b', 80))), 'value', 2)), - (3, named_struct('city', 'guangzhou', 'data', array(map(1, named_struct('a', 90, 'b', 60.0), 2, named_struct('a', 110, 'b', 40))), 'value', 3)), - (4, named_struct('city', 'shenzhen', 'data', array(map(2, named_struct('a', 130, 'b', 20.0), 1, named_struct('a', 150, 'b', 40))), 'value', 4)), - (5, named_struct('city', 'hangzhou', 'data', array(map(1, named_struct('a', 170, 'b', 80.0), 2, named_struct('a', 190, 'b', 40))), 'value', 5)), - (6, named_struct('city', 'nanjing', 'data', array(map(2, named_struct('a', 210, 'b', 60.0), 1, named_struct('a', 230, 'b', 40))), 'value', 6)), - (7, named_struct('city', 'tianjin', 'data', array(map(1, named_struct('a', 250, 'b', 20.0), 2, named_struct('a', 270, 'b', 40))), 'value', 7)), - (8, named_struct('city', 'chongqing', 'data', array(map(2, named_struct('a', 290, 'b', 80.0), 1, named_struct('a', 310, 'b', 40))), 'value', 8)), - (9, named_struct('city', 'wuhan', 'data', array(map(1, named_struct('a', 330, 'b', 60.0), 2, named_struct('a', 350, 'b', 40))), 'value', 9)), - (10, named_struct('city', 'xian', 'data', array(map(2, named_struct('a', 370, 'b', 20.0), 1, named_struct('a', 390, 'b', 40))), 'value', 10)), - (11, named_struct('city', 'changsha', 'data', array(map(1, named_struct('a', 410, 'b', 80.0), 2, named_struct('a', 430, 'b', 40))), 'value', 11)), - (12, named_struct('city', 'qingdao', 'data', array(map(2, named_struct('a', 450, 'b', 60.0), 1, named_struct('a', 470, 'b', 40))), 'value', 12)), - (13, named_struct('city', 'dalian', 'data', array(map(1, named_struct('a', 490, 'b', 20.0), 2, named_struct('a', 510, 'b', 40))), 'value', 13)); + insert into `tbl_test_pruned_columns` + select + number as id, + named_struct( + 'city', + case (number % 10) + when 0 then 'beijing' + when 1 then 'shanghai' + when 2 then 'shenzhen' + when 3 then 'guangzhou' + when 4 then 'hangzhou' + when 5 then 'chengdu' + when 6 then 'wuhan' + when 7 then 'xian' + when 8 then 'nanjing' + else null + end, + 'data', + array( + map( + 1, named_struct('a', number * 10, 'b', (number * 10 + number % 5) * 1.0), + 2, named_struct('a', number * 10 + 20, 'b', (number % 10 + 1) * 10.0) + ), + map( + (number % 3 + 1), named_struct('a', number * 5, 'b', number * 2.5), + (number % 5 + 2), named_struct('a', number * 3, 'b', number * 1.5) + ) + ), + 'value', + number + ) as s + from numbers("number" = "3000"); """ qt_sql """ - select * from `tbl_test_pruned_columns` order by 1; + select element_at(s, 'city'), count() from `tbl_test_pruned_columns` group by element_at(s, 'city') order by 1, 2; """ qt_sql1 """ - select b.id, array_map(x -> element_at(map_values(x)[1], 'a'), element_at(s, 'data')) from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id order by 1; + select + b.id + , array_map(x -> element_at(map_values(x)[1], 'a') + , element_at(s, 'data')) + from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id + order by 1, 2 limit 0, 20; + """ + + qt_sql1_1 """ + select + b.id + , array_map(x -> element_at(map_values(x)[1], 'a') + , element_at(s, 'data')) + from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id + order by 1, 2 limit 100, 20; + """ + + qt_sql1_2 """ + select + b.id + , array_map(x -> element_at(map_values(x)[1], 'a') + , element_at(s, 'data')) + from `tbl_test_pruned_columns` t join (select 1 id) b on t.id = b.id + order by 1 desc, 2 limit 100, 20; """ qt_sql2 """ - select id, element_at(s, 'city') from `tbl_test_pruned_columns` order by 1; + select id, element_at(s, 'city') from `tbl_test_pruned_columns` order by 1 limit 0, 20; + """ + + qt_sql2_1 """ + select id, element_at(s, 'city') from `tbl_test_pruned_columns` order by 1 limit 100, 20; + """ + + qt_sql2_2 """ + select id, element_at(s, 'city') from `tbl_test_pruned_columns` order by 1 desc limit 0, 20; """ qt_sql3 """ - select id, element_at(s, 'data') from `tbl_test_pruned_columns` order by 1; + select id, element_at(s, 'data') from `tbl_test_pruned_columns` order by 1 limit 0, 20; + """ + + qt_sql3_1 """ + select id, element_at(s, 'data') from `tbl_test_pruned_columns` order by 1 limit 200, 20; + """ + + qt_sql3_2 """ + select id, element_at(s, 'data') from `tbl_test_pruned_columns` order by 1 desc limit 0, 20; """ qt_sql4 """ - select id, element_at(s, 'data') from `tbl_test_pruned_columns` where element_at(element_at(s, 'data')[1][2], 'b') = 40 order by 1; + select + id + , element_at(s, 'data') + from `tbl_test_pruned_columns` + where element_at(element_at(s, 'data')[1][2], 'b') = 40 + order by 1 limit 0, 20; + """ + + qt_sql4_1 """ + select + id + , element_at(s, 'data') + from `tbl_test_pruned_columns` + where element_at(element_at(s, 'data')[1][2], 'b') = 40 + order by 1 limit 100, 20; + """ + + qt_sql4_2 """ + select + id + , element_at(s, 'data') + from `tbl_test_pruned_columns` + where element_at(element_at(s, 'data')[1][2], 'b') = 40 + order by 1 desc limit 0, 20; """ qt_sql5 """ - select id, element_at(s, 'city') from `tbl_test_pruned_columns` where element_at(element_at(s, 'data')[1][2], 'b') = 40 order by 1; + select + id + , element_at(s, 'city') + from `tbl_test_pruned_columns` + where element_at(element_at(s, 'data')[1][2], 'b') = 40 + order by 1, 2 limit 0, 20; """ qt_sql5_1 """ - select /*+ set enable_prune_nested_column = 1; */ sum(s.value) from `tbl_test_pruned_columns` where id in(1,2,3,4,8,9,10,11,13); + select + id + , element_at(s, 'city') + from `tbl_test_pruned_columns` + where element_at(element_at(s, 'data')[1][2], 'b') = 40 + order by 1, 2 limit 100, 20; """ qt_sql5_2 """ - select /*+ set enable_prune_nested_column = 0; */ sum(s.value) from `tbl_test_pruned_columns` where id in(1,2,3,4,8,9,10,11,13); + select + id + , element_at(s, 'city') + from `tbl_test_pruned_columns` + where element_at(element_at(s, 'data')[1][2], 'b') = 40 + order by 1 desc, 2 limit 0, 20; + """ + + qt_sql5_3 """ + select /*+ SET_VAR(enable_prune_nested_column=true) */ sum(s.value) from `tbl_test_pruned_columns` where id in(1,2,3,4,8,9,10,11,13); + """ + + qt_sql5_4 """ + select /*+ SET_VAR(enable_prune_nested_column=false) */ sum(s.value) from `tbl_test_pruned_columns` where id in(1,2,3,4,8,9,10,11,13); + """ + + qt_sql5_5 """ + select + id + , s + from `tbl_test_pruned_columns` + where element_at(s, 'city') is null + order by 1 limit 0, 20; """ sql """DROP TABLE IF EXISTS `tbl_test_pruned_columns_map`""" @@ -98,10 +213,102 @@ suite("test_pruned_columns") { """ qt_sql6 """ - select count(element_at(dynamic_attributes['theme_preference'], 'confidence_score')) from `tbl_test_pruned_columns_map`; + select + id + , element_at(element_at(s, 'data')[2][3], 'b') + from `tbl_test_pruned_columns` + where element_at(s, 'city') = 'chengdu' + order by 1, 2 limit 0, 20; + """ + + sql "set enable_profile = true" + sql "set profile_level = 2" + sql "set enable_common_expr_pushdown = true" + sql "set enable_prune_nested_column = true" + + def lazyPrunedToken = "lazy_pruned_column_recovery_" + UUID.randomUUID().toString() + sql """ + select + "${lazyPrunedToken}" + , id + , element_at(s, 'data') + from `tbl_test_pruned_columns` + where element_at(s, 'city') = 'chengdu' + order by 1 limit 0, 20; + """ + + def profileAction = new ProfileAction(context) + def profileCompletionStateName = "Profile Completion State" + def profileCompletionStateComplete = "COMPLETE" + def lazyPrunedCounterName = "LazyReadPrunedTime" + def lazyPrunedProfile = "" + def lazyPrunedProfileState = "" + for (int attempt = 0; attempt < 60; attempt++) { + for (def profileItem : profileAction.getProfileList()) { + if (profileItem["Sql Statement"].toString().contains(lazyPrunedToken)) { + lazyPrunedProfileState = profileItem[profileCompletionStateName]?.toString() + def currentProfile = profileAction.getProfile(profileItem["Profile ID"].toString()) + if (currentProfile != null && !currentProfile.isEmpty()) { + lazyPrunedProfile = currentProfile + } + break + } + } + if (lazyPrunedProfileState == profileCompletionStateComplete + && lazyPrunedProfile.contains(lazyPrunedCounterName)) { + break + } + Thread.sleep(500) + } + assertTrue(lazyPrunedProfile != null && !lazyPrunedProfile.isEmpty(), + "profile not found for ${lazyPrunedToken}") + assertTrue(lazyPrunedProfileState == profileCompletionStateComplete, + "profile is not complete for ${lazyPrunedToken}, state: ${lazyPrunedProfileState}") + logger.info("${lazyPrunedToken} profile: ${lazyPrunedProfile}") + + def lazyPrunedTimer = (lazyPrunedProfile =~ /${lazyPrunedCounterName}:\s*([0-9.]+)(ns|us|ms|s)/) + boolean foundLazyPrunedTimer = false + boolean nonZeroLazyPrunedTimer = false + while (lazyPrunedTimer.find()) { + foundLazyPrunedTimer = true + if ((lazyPrunedTimer.group(1) as BigDecimal) > 0) { + nonZeroLazyPrunedTimer = true + break + } + } + assertTrue(foundLazyPrunedTimer, + "LazyReadPrunedTime not found in profile for ${lazyPrunedToken}") + assertTrue(nonZeroLazyPrunedTimer, + "LazyReadPrunedTime is zero in profile for ${lazyPrunedToken}: ${lazyPrunedProfile}") + + qt_sql6_1 """ + select + id + , element_at(element_at(s, 'data')[2][3], 'b') + from `tbl_test_pruned_columns` + where element_at(s, 'city') = 'chengdu' + order by 1, 2 limit 100, 20; """ + qt_sql6_2 """ + select + id + , element_at(element_at(s, 'data')[2][3], 'b') + from `tbl_test_pruned_columns` + where element_at(s, 'city') = 'chengdu' + order by 1 desc, 2 limit 0, 20; + """ + + sql "set enable_profile = false" + sql "unset variable profile_level" + sql "set enable_common_expr_pushdown = false" + sql "set enable_prune_nested_column = false" + qt_sql7 """ + select count(element_at(dynamic_attributes['theme_preference'], 'confidence_score')) from `tbl_test_pruned_columns_map`; + """ + + qt_sql8 """ select element_at(dynamic_attributes['theme_preference'], 'confidence_score') from `tbl_test_pruned_columns_map` order by id; """ @@ -113,12 +320,12 @@ suite("test_pruned_columns") { `s_info` STRUCT, `arr_s` ARRAY>, `map_s` MAP> - ) - UNIQUE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 4 + ) + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 4 PROPERTIES ( "replication_num" = "1", - "light_schema_change" = "true" + "light_schema_change" = "true" ); """ sql """ @@ -134,7 +341,7 @@ suite("test_pruned_columns") { INSERT INTO nested_sc_tbl VALUES (3, struct(30.5, 'v3', 888), array(struct(500, 600, 'added_z'), struct(501, 601, 'added_z_2')), map('k3', struct(3, 3.3))); """ - qt_sql8 """ + qt_sql9 """ select element_at(element_at(arr_s, 1), 'z') as inner_z FROM nested_sc_tbl ORDER BY id; """ -} \ No newline at end of file +} diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy index 2d55528f184e9f..923a7b66421c62 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy @@ -39,7 +39,8 @@ suite("lambda_null_pruning") { // Case 1: single-variable constant lambda body + IS NULL // body = Literal(true), array item variable unreferenced // collectArrayPathInLambda won't register full-access path. - // If IS NULL already registered [a.NULL], pruning goes wrong. + // The array data path must be present for array_count(); the NULL metadata path may remain + // in the plan and is consumed at the array level. // ================================================================ explain { sql """ @@ -47,7 +48,8 @@ suite("lambda_null_pruning") { FROM lambda_null_pruning_tbl ORDER BY id """ contains "nested columns" - notContains "a.NULL" + contains "a.*" + contains "a.NULL" } order_qt_case1 """ @@ -61,7 +63,8 @@ suite("lambda_null_pruning") { // x -> body references -> visitArrayItemSlot fires -> [a, *] OK // y -> body does NOT reference -> visitArrayItemSlot missing // b IS NULL -> [b, NULL] registered -> bug triggered - // After fix: fallback adds [b, *] for unreferenced y + // After fix: fallback adds [b, *] for unreferenced y. NULL metadata paths may still remain + // beside the full array data paths. // ================================================================ explain { sql """ @@ -70,8 +73,10 @@ suite("lambda_null_pruning") { FROM lambda_null_pruning_tbl ORDER BY id """ contains "nested columns" - notContains "a.NULL" - notContains "b.NULL" + contains "a.*" + contains "a.NULL" + contains "b.*" + contains "b.NULL" } order_qt_case2 """ @@ -92,8 +97,10 @@ suite("lambda_null_pruning") { FROM lambda_null_pruning_tbl ORDER BY id """ contains "nested columns" - notContains "a.NULL" - notContains "b.NULL" + contains "a.*" + contains "a.NULL" + contains "b.*" + contains "b.NULL" } order_qt_case3 """ @@ -111,7 +118,8 @@ suite("lambda_null_pruning") { FROM lambda_null_pruning_tbl ORDER BY id """ contains "nested columns" - notContains "a.NULL" + // array_filter returns the array itself, so the full array path covers the null flag. + contains "all access paths: [a]" } order_qt_case4 """ @@ -128,7 +136,8 @@ suite("lambda_null_pruning") { FROM lambda_null_pruning_tbl ORDER BY id """ contains "nested columns" - notContains "a.NULL" + contains "a.*" + contains "a.NULL" } // ================================================================ @@ -140,7 +149,8 @@ suite("lambda_null_pruning") { FROM lambda_null_pruning_tbl ORDER BY id """ contains "nested columns" - notContains "a.OFFSET" + contains "a.*" + contains "a.OFFSET" } order_qt_case6 """ diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/map_contains_arg_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/map_contains_arg_pruning.groovy index ea2a51778db0fc..73e04b8d87a2ff 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/map_contains_arg_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/map_contains_arg_pruning.groovy @@ -47,9 +47,8 @@ suite("map_contains_arg_pruning") { // ================================================================ // Case 1: map_contains_key + element_at IS NULL (original bug) - // map_contains_key(m, element_at(s, 'a')) needs full access to s.a - // as the key lookup value. Without fix, only [s.a.NULL] from - // element_at(s, 'a') IS NULL is registered. + // map_contains_key(m, element_at(s, 'a')) needs full access to s.a as the key lookup value. + // The NULL metadata path from element_at(s, 'a') IS NULL may remain beside the data path. // ================================================================ explain { sql """ @@ -60,7 +59,7 @@ suite("map_contains_arg_pruning") { """ contains "nested columns" contains "s.a" // s.a should appear in access paths - notContains "s.a.NULL" // should NOT be null-only + contains "s.a.NULL" // NULL metadata may remain beside full data contains "m.KEYS" // map_contains_key needs KEYS path } @@ -84,7 +83,7 @@ suite("map_contains_arg_pruning") { """ contains "nested columns" contains "s.b" // s.b should appear in access paths - notContains "s.b.NULL" // should NOT be null-only + contains "s.b.NULL" // NULL metadata may remain beside full data } order_qt_case2 """ @@ -112,8 +111,8 @@ suite("map_contains_arg_pruning") { contains "nested columns" contains "s.a" contains "s.b" - notContains "s.a.NULL" - notContains "s.b.NULL" + contains "s.a.NULL" + contains "s.b.NULL" } order_qt_case3 """ diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/nested_container_offset_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/nested_container_offset_pruning.groovy index 4439bf03f83689..b0cc2d2eef8e00 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/nested_container_offset_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/nested_container_offset_pruning.groovy @@ -30,21 +30,36 @@ suite("nested_container_offset_pruning") { PROPERTIES ("replication_allocation" = "tag.location.default: 1") """ sql """ - INSERT INTO nested_container_offset_pruning_tbl VALUES ( - 1, - named_struct( - 'arr', array( - named_struct('str_field', 'hello', 'int_field', 10), - named_struct('str_field', 'world', 'int_field', 20) - ), - 'm', {'a': 'x', 'b': 'y'} + INSERT INTO nested_container_offset_pruning_tbl VALUES + ( + 1, + named_struct( + 'arr', array( + named_struct('str_field', 'hello', 'int_field', 10), + named_struct('str_field', 'world', 'int_field', 20) + ), + 'm', {'a': 'x', 'b': 'y'} + ) + ), + ( + 2, + named_struct( + 'arr', array(), + 'm', {'a': 'longer', 'c': ''} + ) + ), + ( + 3, + named_struct( + 'arr', array(named_struct('str_field', 'empty', 'int_field', 30)), + 'm', {'b': 'only-b'} + ) ) - ) """ - // cardinality(s.arr) only needs array offsets, but element_at(...).int_field also needs - // array item data. The redundant s.arr.OFFSET path must be removed even though the root slot - // itself is STRUCT. + // cardinality(s.arr) needs array offsets, and element_at(...).int_field also needs array item + // data. Keep both paths: the BE consumes the current array-level metadata at the array + // iterator without forwarding it to the item iterator. order_qt_struct_root_arr_mixed """ SELECT id, cardinality(element_at(s, 'arr')), @@ -52,9 +67,8 @@ suite("nested_container_offset_pruning") { FROM nested_container_offset_pruning_tbl ORDER BY id """ - // Same issue for nested maps: length(element_at(s.m, 'a')) needs the key lookup path, - // while map_values(s.m)[1] needs full value data. Dedup must therefore keep KEYS + VALUES - // and drop only the redundant value-side OFFSET path under the nested map container. + // Same issue for nested maps: length(element_at(s.m, 'a')) needs the key lookup path and + // value-string offsets, while map_values(s.m)[1] needs full value data. order_qt_struct_root_map_mixed """ SELECT id, length(element_at(element_at(s, 'm'), 'a')), @@ -62,6 +76,26 @@ suite("nested_container_offset_pruning") { FROM nested_container_offset_pruning_tbl ORDER BY id """ + // Predicate on array offsets + output from array item should keep both current-level metadata + // and child data paths across FE access-path construction and BE iterator routing. + order_qt_struct_root_arr_predicate_mixed """ + SELECT id, + element_at(element_at(element_at(s, 'arr'), 1), 'str_field') + FROM nested_container_offset_pruning_tbl + WHERE cardinality(element_at(s, 'arr')) >= 1 + ORDER BY id + """ + + // Predicate length(map['a']) needs KEYS + VALUES.OFFSET, while projecting map['a'] still needs + // the full value data for the same map branch. + order_qt_struct_root_map_predicate_mixed """ + SELECT id, + element_at(element_at(s, 'm'), 'a') + FROM nested_container_offset_pruning_tbl + WHERE length(element_at(element_at(s, 'm'), 'a')) >= 1 + ORDER BY id + """ + explain { sql """ SELECT cardinality(element_at(s, 'arr')), @@ -69,7 +103,7 @@ suite("nested_container_offset_pruning") { FROM nested_container_offset_pruning_tbl """ contains "s.arr.*.int_field" - notContains "s.arr.OFFSET" + contains "s.arr.OFFSET" } explain { @@ -80,6 +114,26 @@ suite("nested_container_offset_pruning") { """ contains "s.m.KEYS" contains "s.m.VALUES" - notContains "OFFSET" + contains "s.m.VALUES.OFFSET" + } + + explain { + sql """ + SELECT element_at(element_at(element_at(s, 'arr'), 1), 'str_field') + FROM nested_container_offset_pruning_tbl + WHERE cardinality(element_at(s, 'arr')) >= 1 + """ + contains "s.arr.*.str_field" + contains "s.arr.OFFSET" + } + + explain { + sql """ + SELECT element_at(element_at(s, 'm'), 'a') + FROM nested_container_offset_pruning_tbl + WHERE length(element_at(element_at(s, 'm'), 'a')) >= 1 + """ + contains "all access paths: [s.m.KEYS, s.m.VALUES, s.m.VALUES.OFFSET]" + contains "predicate access paths: [s.m.KEYS, s.m.VALUES.OFFSET]" } } diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy index 69ba137d4afe38..6e22ae943e0ebd 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy @@ -23,9 +23,8 @@ // The EXPLAIN plan should show: // nested columns: : all access paths: [.NULL] // -// When the same column is also accessed for data (e.g., projected or used in -// element_at), the NULL-only path must be stripped from allAccessPaths and -// predicateAccessPaths unless the same path is still present in allAccessPaths. +// When the same column is also accessed for data (e.g., projected or used in element_at), +// allAccessPaths keep the data path and predicateAccessPaths keep the predicate metadata path. suite("null_column_pruning") { sql """ DROP TABLE IF EXISTS ncp_tbl """ @@ -123,8 +122,7 @@ suite("null_column_pruning") { sql "select id, arr_col from ncp_tbl where arr_col is null" contains "nested columns" contains "all access paths: [arr_col]" - notContains "arr_col.NULL" - notContains "predicate access paths:" + contains "predicate access paths: [arr_col.NULL]" } order_qt_array_full_access_strips_null """ @@ -145,8 +143,7 @@ suite("null_column_pruning") { sql "select id, map_col from ncp_tbl where map_col is null" contains "nested columns" contains "all access paths: [map_col]" - notContains "map_col.NULL" - notContains "predicate access paths:" + contains "predicate access paths: [map_col.NULL]" } order_qt_map_full_access_strips_null """ @@ -186,44 +183,38 @@ suite("null_column_pruning") { order_qt_10 "select int_col from ncp_tbl where int_col is null"; // ─── Mixed: struct IS NULL + partial field access ─────────────────────────── - // struct_col IS NULL in WHERE + element_at in SELECT → child data is also needed. - // The parent struct_col.NULL path must NOT stay in allAccessPaths with child paths. - // BE StructFileColumnIterator treats a leading NULL sub-path as NULL_MAP_ONLY; if - // allAccessPaths were [struct_col.NULL, struct_col.city], BE would skip the city - // child iterator and default-fill the projected value. The normal nullable struct - // read materializes the parent null map together with child data, and - // predicateAccessPaths is filtered so it remains a subset of allAccessPaths. + // struct_col IS NULL in WHERE + element_at in SELECT needs struct data for projection, while + // the predicate keeps the parent null map separately. explain { sql "select element_at(struct_col, 'city') from ncp_tbl where struct_col is null" contains "nested columns" - contains "all access paths: [struct_col.city]" - notContains "predicate access paths:" + contains "all access paths: [struct_col]" + contains "predicate access paths: [struct_col.NULL]" } order_qt_11 "select element_at(struct_col, 'city') from ncp_tbl where struct_col is null"; - // This query verifies the real correctness risk: one branch needs the parent null - // map, another branch needs a child null map, and the projection needs another - // child data path. Keeping struct_col.NULL in allAccessPaths would put BE in - // NULL_MAP_ONLY mode for the whole struct and return the default zip value instead - // of reading the zip child column. + // This query verifies the real correctness risk: predicate paths need both parent and child + // null maps, while allAccessPaths keeps the struct data path for projection. explain { sql "select element_at(struct_col, 'zip') from ncp_tbl where struct_col is null or element_at(struct_col, 'city') is null" contains "nested columns" - contains "all access paths: [struct_col.city.NULL, struct_col.zip]" - contains "predicate access paths: [struct_col.city.NULL]" + contains "all access paths: [struct_col]" + contains "predicate access paths:" + contains "struct_col.NULL" + contains "struct_col.city.NULL" } order_qt_parent_null_with_child_data "select element_at(struct_col, 'zip') from ncp_tbl where struct_col is null or element_at(struct_col, 'city') is null"; // ─── Non-optimizable: struct IS NULL + full struct projected ──────────────── - // Full struct access covers its own null flag, so [struct_col.NULL] is stripped - // from allAccessPaths but kept in predicateAccessPaths. + // Full struct access stays in allAccessPaths. The predicate keeps [struct_col.NULL] + // in predicateAccessPaths. explain { sql "select struct_col from ncp_tbl where struct_col is null" contains "nested columns" contains "all access paths: [struct_col]" - notContains "predicate access paths:" + contains "predicate access paths: [struct_col.NULL]" } order_qt_12 "select struct_col from ncp_tbl where struct_col is null"; @@ -231,13 +222,12 @@ suite("null_column_pruning") { // ─── Nested struct field IS NULL ──────────────────────────────────────────── // element_at(struct_col, 'city') IS NULL should produce a null-flag-only // predicate path [struct_col.city.NULL] while the projection reads city data. - // [struct_col.city.NULL] is stripped from allAccessPaths because [struct_col.city] - // covers the same prefix (full city data includes its null flag). + // [struct_col.city.NULL] remains in predicateAccessPaths beside the projected city data path. explain { sql "select element_at(struct_col, 'city') from ncp_tbl where element_at(struct_col, 'city') is null" contains "nested columns" - contains "all access paths: [struct_col.city]" - notContains "predicate access paths:" + contains "struct_col.city" + contains "predicate access paths: [struct_col.city.NULL]" } order_qt_13 "select element_at(struct_col, 'city') from ncp_tbl where element_at(struct_col, 'city') is null"; @@ -400,26 +390,25 @@ suite("null_column_pruning") { order_qt_24 "select count(1) from ncp_tbl where element_at(struct_col, 'city') is not null"; // ─── Mixed: map_keys IS NULL + map_keys projected ────────────────────────── - // Projection needs key data, while the predicate checks whether the parent map - // is NULL. The parent NULL path must not stay in either access path list, so BE - // does not switch the whole map iterator to NULL_MAP_ONLY and skip the keys child. + // Projection needs map data, while the predicate checks whether the parent map is NULL. The + // parent NULL path stays in predicateAccessPaths. explain { sql "select map_keys(map_col) from ncp_tbl where map_keys(map_col) is null" contains "nested columns" - contains "all access paths: [map_col.KEYS]" - notContains "predicate access paths:" + contains "all access paths: [map_col]" + contains "predicate access paths: [map_col.NULL]" } order_qt_25 "select map_keys(map_col) from ncp_tbl where map_keys(map_col) is null"; // ─── Mixed: map_values IS NULL + map_values projected ────────────────────── - // Projection needs value data, while the predicate checks whether the parent + // Projection needs map data, while the predicate checks whether the parent // map is NULL. A NULL value element does not make map_values(map_col) NULL. explain { sql "select map_values(map_col) from ncp_tbl where map_values(map_col) is null" contains "nested columns" - contains "all access paths: [map_col.VALUES]" - notContains "predicate access paths:" + contains "all access paths: [map_col]" + contains "predicate access paths: [map_col.NULL]" } order_qt_26 "select map_values(map_col) from ncp_tbl where map_values(map_col) is null"; @@ -526,13 +515,13 @@ suite("null_column_pruning") { order_qt_33 "select 1 from ncp_tbl_nn where id is null"; // ─── length(str_col) = 0 OR str_col IS NULL ──────────────────────────────── - // length(str_col) already uses the OFFSET path, and BE can derive null-ness - // from that layout, so the extra NULL-only path is redundant. + // The OR predicate needs both pieces of metadata: length(str_col) uses OFFSET and + // str_col IS NULL uses NULL. Keep both paths in the predicate metadata set. explain { sql "select 1 from ncp_tbl where length(str_col) = 0 or str_col is null" contains "nested columns" contains "str_col.OFFSET" - notContains "str_col.NULL" + contains "str_col.NULL" } order_qt_34 "select 1 from ncp_tbl where length(str_col) = 0 or str_col is null"; diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy index e0159f749f2ba8..046a422992b017 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy @@ -76,7 +76,7 @@ suite("string_length_column_pruning") { sql "select if(length(str_col) >= 5, true, false) a from slcp_str_tbl order by id" contains "nested columns" contains "str_col.OFFSET" - notContains "str_col.NULL" + contains "str_col.NULL" notContains "all access paths: [id]" } sql "select if(length(str_col) >= 5, true, false) a from slcp_str_tbl order by id" @@ -146,10 +146,12 @@ suite("string_length_column_pruning") { notContains "type=bigint" } sql "select sum(cardinality(arr_col)) from slcp_str_tbl" - // arr_col also accessed via element_at → full element data needed, OFFSET suppressed. + // arr_col is also accessed via element_at, so full element data is needed. Keep OFFSET + // as well because cardinality(arr_col) still needs array offset metadata. explain { sql "select cardinality(arr_col), arr_col[1] from slcp_str_tbl" - notContains "OFFSET" + contains "arr_col.*" + contains "arr_col.OFFSET" notContains "type=bigint" } @@ -262,29 +264,35 @@ suite("string_length_column_pruning") { notContains "type=bigint" } - // value also accessed directly (arr[0]) → full VALUES needed, OFFSET suppressed + // Value is also accessed directly (arr[0]), so full VALUES element data is needed. Keep + // VALUES.OFFSET as well because cardinality(map_arr_col['a']) still needs array offset metadata. explain { sql "select cardinality(map_arr_col['a']), map_arr_col['b'][0] from slcp_str_tbl" - notContains "OFFSET" + contains "map_arr_col.KEYS" + contains "map_arr_col.VALUES.*" + contains "map_arr_col.VALUES.OFFSET" notContains "type=bigint" } - // [map_arr_struct_col, VALUES, *, verified] strips [map_arr_struct_col, VALUES, OFFSET]. + // The struct field is also accessed directly, so keep only the required verified field data. + // VALUES.OFFSET is still needed by cardinality(map_arr_struct_col['a']). explain { sql "select cardinality(map_arr_struct_col['a']), map_arr_struct_col['a'][1].verified from slcp_str_tbl" contains "nested columns" contains "map_arr_struct_col.KEYS" contains "map_arr_struct_col.VALUES.*.verified" - notContains "map_arr_struct_col.VALUES.OFFSET" + contains "map_arr_struct_col.VALUES.OFFSET" notContains "type=bigint" } - // [map_arr_col, VALUES] strips [map_arr_col, VALUES, OFFSET]. + // Returning map_arr_col['a'] needs full VALUES data. Keep VALUES.OFFSET too because + // cardinality(map_arr_col['a']) also reads the selected value-array offsets. explain { sql "select id, cardinality(map_arr_col['a']), map_arr_col['a'] from slcp_str_tbl" contains "nested columns" - contains "all access paths: [map_arr_col.KEYS, map_arr_col.VALUES]" - notContains "map_arr_col.VALUES.OFFSET" + contains "map_arr_col.KEYS" + contains "map_arr_col.VALUES" + contains "map_arr_col.VALUES.OFFSET" notContains "predicate access paths:" notContains "type=bigint" } @@ -294,14 +302,13 @@ suite("string_length_column_pruning") { order by id """ - // [map_arr_struct_col, VALUES, *, verified] strips [map_arr_struct_col, VALUES, OFFSET]. - // KEYS (data path) remains in predicateAccessPaths. + // Keep the value-array offset in predicate paths: predicate evaluation needs it for + // cardinality(), and lazy materialization still needs the verified field after filtering. explain { sql "select map_arr_struct_col['a'][1].verified from slcp_str_tbl where cardinality(map_arr_struct_col['a']) > 0" contains "nested columns" - contains "all access paths: [map_arr_struct_col.KEYS, map_arr_struct_col.VALUES.*.verified]" - contains "predicate access paths: [map_arr_struct_col.KEYS]" - notContains "map_arr_struct_col.VALUES.OFFSET" + contains "all access paths: [map_arr_struct_col.KEYS, map_arr_struct_col.VALUES.*.verified, map_arr_struct_col.VALUES.OFFSET]" + contains "predicate access paths: [map_arr_struct_col.KEYS, map_arr_struct_col.VALUES.OFFSET]" notContains "type=bigint" } @@ -311,15 +318,13 @@ suite("string_length_column_pruning") { order by 1 """ - // [map_arr_struct_col, VALUES, *, verified] strips [map_arr_struct_col, VALUES, NULL]. - // KEYS (data path) remains in predicateAccessPaths. + // Predicate keeps the value-array NULL path so IS NULL can read the value-array null map + // during predicate evaluation, while allAccessPaths keeps the data paths used by projection. explain { sql "select map_arr_struct_col['a'][1].verified from slcp_str_tbl where map_arr_struct_col['a'] is null" contains "nested columns" - contains "map_arr_struct_col.KEYS" - contains "map_arr_struct_col.VALUES.*.verified" - contains "predicate access paths: [map_arr_struct_col.KEYS]" - notContains "map_arr_struct_col.VALUES.NULL" + contains "all access paths: [map_arr_struct_col.KEYS, map_arr_struct_col.VALUES.*.verified, map_arr_struct_col.VALUES.NULL]" + contains "predicate access paths: [map_arr_struct_col.KEYS, map_arr_struct_col.VALUES.NULL]" } // ─── Non-optimizable cases ────────────────────────────────────────────────── @@ -434,10 +439,13 @@ suite("string_length_column_pruning") { notContains "bigint" } - // length(map_col['a']) + direct map access → OFFSET suppressed, full VALUES needed + // length(map_col['a']) + direct map value access still needs full VALUES data. + // Keep VALUES.OFFSET as the length() access on map value still depends on the map value offsets. explain { sql "select length(map_col['a']), map_col['b'] from slcp_str_tbl" - notContains "OFFSET" + contains "map_col.KEYS" + contains "map_col.VALUES" + contains "map_col.VALUES.OFFSET" notContains "bigint" } @@ -521,7 +529,9 @@ suite("string_length_column_pruning") { FROM slcp_struct_root_tbl """ contains "s.arr.*.int_field" - notContains "s.arr.OFFSET" + // cardinality(element_at(s, 'arr')) still needs the nested array offsets, + // while the selected element field needs the int_field data. + contains "s.arr.OFFSET" } explain { @@ -532,12 +542,15 @@ suite("string_length_column_pruning") { """ contains "s.m.KEYS" contains "s.m.VALUES" - notContains "OFFSET" + // The length() on a map value needs value-string offsets even though the query + // also directly reads map values. + contains "s.m.VALUES.OFFSET" } // ─── Map element_at + map_values mixed access ───────────────────────────────── - // [map_col, VALUES] strips [map_col, VALUES, OFFSET]. KEYS is kept for element_at lookup. + // Full VALUES data is still needed for map_values(), while the length() access on a + // map value keeps VALUES.OFFSET. KEYS is kept for element_at lookup. order_qt_map_element_with_map_values """ select length(map_col['a']), map_values(map_col)[1] from slcp_str_tbl """ @@ -547,14 +560,17 @@ suite("string_length_column_pruning") { contains "nested columns" contains "KEYS" contains "VALUES" - notContains "OFFSET" + contains "map_col.VALUES.OFFSET" notContains "bigint" } - // [map_col, VALUES] strips [map_col, VALUES, OFFSET]. + // Full VALUES data is needed for map_col['a']; length(map_values(map_col)[1]) keeps + // VALUES.OFFSET for value-string offsets. explain { sql "select length(map_values(map_col)[1]), map_col['a'] from slcp_str_tbl" - notContains "OFFSET" + contains "map_col.KEYS" + contains "map_col.VALUES" + contains "map_col.VALUES.OFFSET" notContains "bigint" }