diff --git a/be/src/common/consts.h b/be/src/common/consts.h index 2ec9ae126796eb..548d5a771a2dd4 100644 --- a/be/src/common/consts.h +++ b/be/src/common/consts.h @@ -27,6 +27,7 @@ const std::string CSV_WITH_NAMES_AND_TYPES = "csv_with_names_and_types"; const std::string BLOCK_TEMP_COLUMN_PREFIX = "__TEMP__"; const std::string BLOCK_TEMP_COLUMN_SCANNER_FILTERED = "__TEMP__scanner_filtered"; const std::string ROWID_COL = "__DORIS_ROWID_COL__"; +const std::string GLOBAL_ROWID_COL = "__DORIS_GLOBAL_ROWID_COL__"; const std::string ROW_STORE_COL = "__DORIS_ROW_STORE_COL__"; const std::string DYNAMIC_COLUMN_NAME = "__DORIS_DYNAMIC_COL__"; const std::string PARTIAL_UPDATE_AUTO_INC_COL = "__PARTIAL_UPDATE_AUTO_INC_COLUMN__"; diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index 15355b34f4b5f6..d5f93d95d94af2 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -55,6 +55,7 @@ #include "olap/utils.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" // ExecEnv +#include "runtime/fragment_mgr.h" // FragmentMgr #include "runtime/runtime_state.h" // RuntimeState #include "runtime/types.h" #include "util/brpc_client_cache.h" // BrpcClientCache @@ -68,6 +69,9 @@ #include "vec/data_types/data_type_factory.hpp" #include "vec/data_types/data_type_struct.h" #include "vec/data_types/serde/data_type_serde.h" +#include "vec/exec/format/orc/vorc_reader.h" +#include "vec/exec/format/parquet/vparquet_reader.h" +#include "vec/exec/scan/file_scanner.h" #include "vec/functions/function_helpers.h" #include "vec/jsonb/serialize.h" @@ -297,15 +301,6 @@ Status RowIDFetcher::fetch(const vectorized::ColumnPtr& column_row_ids, return Status::OK(); } -template -auto scope_timer_run(Func fn, int64_t* cost) -> decltype(fn()) { - MonotonicStopWatch watch; - watch.start(); - auto res = fn(); - *cost += watch.elapsed_time() / 1000 / 1000; - return res; -} - struct IteratorKey { int64_t tablet_id; RowsetId rowset_id; @@ -481,4 +476,346 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequest& request, return Status::OK(); } +Status RowIdStorageReader::read_by_rowids(const PMultiGetRequestV2& request, + PMultiGetResponseV2* response) { + if (request.request_block_descs_size()) { + auto tquery_id = ((UniqueId)request.query_id()).to_thrift(); + std::vector result_blocks(request.request_block_descs_size()); + + OlapReaderStatistics stats; + int64_t acquire_tablet_ms = 0; + int64_t acquire_rowsets_ms = 0; + int64_t acquire_segments_ms = 0; + int64_t lookup_row_data_ms = 0; + + int64_t external_init_reader_ms = 0; + int64_t external_get_block_ms = 0; + + // Add counters for different file mapping types + std::unordered_map file_type_counts; + + auto id_file_map = + ExecEnv::GetInstance()->get_id_manager()->get_id_file_map(request.query_id()); + if (!id_file_map) { + return Status::InternalError("Backend:{} id_file_map is null, query_id: {}", + BackendOptions::get_localhost(), print_id(tquery_id)); + } + + for (int i = 0; i < request.request_block_descs_size(); ++i) { + const auto& request_block_desc = request.request_block_descs(i); + if (request_block_desc.row_id_size() >= 1) { + // Since this block belongs to the same table, we only need to take the first type for judgment. + auto first_file_id = request_block_desc.file_id(0); + auto first_file_mapping = id_file_map->get_file_mapping(first_file_id); + if (!first_file_mapping) { + return Status::InternalError( + "Backend:{} file_mapping not found, query_id: {}, file_id: {}", + BackendOptions::get_localhost(), print_id(request.query_id()), + first_file_id); + } + file_type_counts[first_file_mapping->type] += request_block_desc.row_id_size(); + + // prepare slots to build block + std::vector slots; + slots.reserve(request_block_desc.slots_size()); + for (const auto& pslot : request_block_desc.slots()) { + slots.push_back(SlotDescriptor(pslot)); + } + // prepare block char vector shrink for char type + std::vector char_type_idx; + for (int j = 0; j < slots.size(); ++j) { + auto slot = slots[j]; + if (_has_char_type(slot.type())) { + char_type_idx.push_back(j); + } + } + + if (first_file_mapping->type == FileMappingType::INTERNAL) { + RETURN_IF_ERROR(read_batch_doris_format_row( + request_block_desc, id_file_map, slots, tquery_id, result_blocks[i], + stats, &acquire_tablet_ms, &acquire_rowsets_ms, &acquire_segments_ms, + &lookup_row_data_ms)); + } else { + RETURN_IF_ERROR(read_batch_external_row( + request_block_desc, id_file_map, slots, first_file_mapping, tquery_id, + result_blocks[i], &external_init_reader_ms, &external_get_block_ms)); + } + + // after read the block, shrink char type block + result_blocks[i].shrink_char_type_column_suffix_zero(char_type_idx); + } + + [[maybe_unused]] size_t compressed_size = 0; + [[maybe_unused]] size_t uncompressed_size = 0; + int be_exec_version = request.has_be_exec_version() ? request.be_exec_version() : 0; + RETURN_IF_ERROR(result_blocks[i].serialize( + be_exec_version, response->add_blocks()->mutable_block(), &uncompressed_size, + &compressed_size, segment_v2::CompressionTypePB::LZ4)); + } + + // Build file type statistics string + std::string file_type_stats; + for (const auto& [type, count] : file_type_counts) { + if (!file_type_stats.empty()) { + file_type_stats += ", "; + } + file_type_stats += fmt::format("{}:{}", type, count); + } + + LOG(INFO) << "Query stats: " + << fmt::format( + "Internal table:" + "hit_cached_pages:{}, total_pages_read:{}, compressed_bytes_read:{}, " + "io_latency:{}ns, uncompressed_bytes_read:{}, bytes_read:{}, " + "acquire_tablet_ms:{}, acquire_rowsets_ms:{}, acquire_segments_ms:{}, " + "lookup_row_data_ms:{}, file_types:[{}]; " + "External table : init_reader_ms:{}, get_block_ms:{}", + stats.cached_pages_num, stats.total_pages_num, + stats.compressed_bytes_read, stats.io_ns, + stats.uncompressed_bytes_read, stats.bytes_read, acquire_tablet_ms, + acquire_rowsets_ms, acquire_segments_ms, lookup_row_data_ms, + file_type_stats, external_init_reader_ms, external_get_block_ms); + } + + if (request.has_gc_id_map() && request.gc_id_map()) { + ExecEnv::GetInstance()->get_id_manager()->remove_id_file_map(request.query_id()); + } + + return Status::OK(); +} + +Status RowIdStorageReader::read_batch_doris_format_row( + const PRequestBlockDesc& request_block_desc, std::shared_ptr id_file_map, + std::vector& slots, const TUniqueId& query_id, + vectorized::Block& result_block, OlapReaderStatistics& stats, int64_t* acquire_tablet_ms, + int64_t* acquire_rowsets_ms, int64_t* acquire_segments_ms, int64_t* lookup_row_data_ms) { + if (result_block.is_empty_column()) [[likely]] { + result_block = vectorized::Block(slots, request_block_desc.row_id_size()); + } + + TabletSchema full_read_schema; + for (const ColumnPB& column_pb : request_block_desc.column_descs()) { + full_read_schema.append_column(TabletColumn(column_pb)); + } + std::unordered_map iterator_map; + std::string row_store_buffer; + RowStoreReadStruct row_store_read_struct(row_store_buffer); + if (request_block_desc.fetch_row_store()) { + for (int i = 0; i < request_block_desc.slots_size(); ++i) { + row_store_read_struct.serdes.emplace_back(slots[i].get_data_type_ptr()->get_serde()); + row_store_read_struct.col_uid_to_idx[slots[i].col_unique_id()] = i; + row_store_read_struct.default_values.emplace_back(slots[i].col_default_value()); + } + } + + for (size_t j = 0; j < request_block_desc.row_id_size(); ++j) { + auto file_id = request_block_desc.file_id(j); + auto file_mapping = id_file_map->get_file_mapping(file_id); + if (!file_mapping) { + return Status::InternalError( + "Backend:{} file_mapping not found, query_id: {}, file_id: {}", + BackendOptions::get_localhost(), print_id(query_id), file_id); + } + + RETURN_IF_ERROR(read_doris_format_row( + id_file_map, file_mapping, request_block_desc.row_id(j), slots, full_read_schema, + row_store_read_struct, stats, acquire_tablet_ms, acquire_rowsets_ms, + acquire_segments_ms, lookup_row_data_ms, iterator_map, result_block)); + } + return Status::OK(); +} + +Status RowIdStorageReader::read_batch_external_row(const PRequestBlockDesc& request_block_desc, + std::shared_ptr id_file_map, + std::vector& slots, + std::shared_ptr first_file_mapping, + const TUniqueId& query_id, + vectorized::Block& result_block, + int64_t* init_reader_ms, int64_t* get_block_ms) { + TFileScanRangeParams rpc_scan_params; + TupleDescriptor tuple_desc(request_block_desc.desc(), false); + std::unordered_map colname_to_slot_id; + std::unique_ptr runtime_state = nullptr; + std::unique_ptr runtime_profile; + runtime_profile = std::make_unique("ExternalRowIDFetcher"); + + std::unique_ptr vfile_scanner_ptr = nullptr; + + { + auto& external_info = first_file_mapping->get_external_file_info(); + int plan_node_id = external_info.plan_node_id; + const auto& first_scan_range_desc = external_info.scan_range_desc; + + auto query_ctx = ExecEnv::GetInstance()->fragment_mgr()->get_query_ctx(query_id); + const auto* old_scan_params = &(query_ctx->file_scan_range_params_map[plan_node_id]); + rpc_scan_params = *old_scan_params; + + rpc_scan_params.required_slots.clear(); + rpc_scan_params.column_idxs.clear(); + rpc_scan_params.slot_name_to_schema_pos.clear(); + + std::set partition_name_set(first_scan_range_desc.columns_from_path_keys.begin(), + first_scan_range_desc.columns_from_path_keys.end()); + for (auto slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { + auto& slot = slots[slot_idx]; + tuple_desc.add_slot(&slot); + colname_to_slot_id.emplace(slot.col_name(), slot.id()); + TFileScanSlotInfo slot_info; + slot_info.slot_id = slot.id(); + auto column_idx = request_block_desc.column_idxs(slot_idx); + if (partition_name_set.contains(slot.col_name())) { + //This is partition column. + slot_info.is_file_slot = false; + } else { + rpc_scan_params.column_idxs.emplace_back(column_idx); + slot_info.is_file_slot = true; + } + rpc_scan_params.default_value_of_src_slot.emplace(slot.id(), TExpr {}); + rpc_scan_params.required_slots.emplace_back(slot_info); + rpc_scan_params.slot_name_to_schema_pos.emplace(slot.col_name(), column_idx); + } + + if (result_block.is_empty_column()) [[likely]] { + result_block = vectorized::Block(slots, request_block_desc.row_id_size()); + } + + const auto& query_options = query_ctx->get_query_options(); + const auto& query_globals = query_ctx->get_query_globals(); + + /* + * The scan stage needs the information in query_options to generate different behaviors according to the specific variables: + * query_options.hive_parquet_use_column_names, query_options.truncate_char_or_varchar_columns,query_globals.time_zone ... + * + * To ensure the same behavior as the scan stage, I get query_options query_globals from query_ctx, then create runtime_state + * and pass it to vfile_scanner so that the runtime_state information is the same as the scan stage and the behavior is also consistent. + */ + runtime_state = RuntimeState::create_unique(query_id, -1, query_options, query_globals, + ExecEnv::GetInstance(), query_ctx.get()); + + vfile_scanner_ptr = vectorized::FileScanner::create_unique( + runtime_state.get(), runtime_profile.get(), &rpc_scan_params, &colname_to_slot_id, + &tuple_desc); + + RETURN_IF_ERROR(vfile_scanner_ptr->prepare_for_read_one_line(first_scan_range_desc)); + } + + for (size_t j = 0; j < request_block_desc.row_id_size(); ++j) { + auto file_id = request_block_desc.file_id(j); + auto file_mapping = id_file_map->get_file_mapping(file_id); + if (!file_mapping) { + return Status::InternalError( + "Backend:{} file_mapping not found, query_id: {}, file_id: {}", + BackendOptions::get_localhost(), print_id(query_id), file_id); + } + + auto& external_info = file_mapping->get_external_file_info(); + auto& scan_range_desc = external_info.scan_range_desc; + + // Clear to avoid reading iceberg position delete file... + scan_range_desc.table_format_params.iceberg_params = TIcebergFileDesc {}; + + // Clear to avoid reading hive transactional delete delta file... + scan_range_desc.table_format_params.transactional_hive_params = TTransactionalHiveDesc {}; + + RETURN_IF_ERROR(vfile_scanner_ptr->read_one_line_from_range( + scan_range_desc, request_block_desc.row_id(j), &result_block, external_info, + init_reader_ms, get_block_ms)); + } + return Status::OK(); +} + +Status RowIdStorageReader::read_doris_format_row( + const std::shared_ptr& id_file_map, + const std::shared_ptr& file_mapping, int64_t row_id, + std::vector& slots, const TabletSchema& full_read_schema, + RowStoreReadStruct& row_store_read_struct, OlapReaderStatistics& stats, + int64_t* acquire_tablet_ms, int64_t* acquire_rowsets_ms, int64_t* acquire_segments_ms, + int64_t* lookup_row_data_ms, + std::unordered_map& iterator_map, + vectorized::Block& result_block) { + auto [tablet_id, rowset_id, segment_id] = file_mapping->get_doris_format_info(); + BaseTabletSPtr tablet = scope_timer_run( + [&]() { + auto res = ExecEnv::get_tablet(tablet_id); + return !res.has_value() ? nullptr + : std::dynamic_pointer_cast(res.value()); + }, + acquire_tablet_ms); + if (!tablet) { + return Status::InternalError( + "Backend:{} tablet not found, tablet_id: {}, rowset_id: {}, segment_id: {}, " + "row_id: {}", + BackendOptions::get_localhost(), tablet_id, rowset_id.to_string(), segment_id, + row_id); + } + + BetaRowsetSharedPtr rowset = std::static_pointer_cast( + scope_timer_run([&]() { return id_file_map->get_temp_rowset(tablet_id, rowset_id); }, + acquire_rowsets_ms)); + if (!rowset) { + return Status::InternalError( + "Backend:{} rowset_id not found, tablet_id: {}, rowset_id: {}, segment_id: {}, " + "row_id: {}", + BackendOptions::get_localhost(), tablet_id, rowset_id.to_string(), segment_id, + row_id); + } + + SegmentCacheHandle segment_cache; + RETURN_IF_ERROR(scope_timer_run( + [&]() { + return SegmentLoader::instance()->load_segments(rowset, &segment_cache, true); + }, + acquire_segments_ms)); + + auto it = + std::find_if(segment_cache.get_segments().cbegin(), segment_cache.get_segments().cend(), + [segment_id](const segment_v2::SegmentSharedPtr& seg) { + return seg->id() == segment_id; + }); + if (it == segment_cache.get_segments().end()) { + return Status::InternalError( + "Backend:{} segment not found, tablet_id: {}, rowset_id: {}, segment_id: {}, " + "row_id: {}", + BackendOptions::get_localhost(), tablet_id, rowset_id.to_string(), segment_id, + row_id); + } + segment_v2::SegmentSharedPtr segment = *it; + + // if row_store_read_struct not empty, means the line we should read from row_store + if (!row_store_read_struct.default_values.empty()) { + CHECK(tablet->tablet_schema()->has_row_store_for_all_columns()); + RowLocation loc(rowset_id, segment->id(), row_id); + row_store_read_struct.row_store_buffer.clear(); + RETURN_IF_ERROR(scope_timer_run( + [&]() { + return tablet->lookup_row_data({}, loc, rowset, stats, + row_store_read_struct.row_store_buffer); + }, + lookup_row_data_ms)); + + vectorized::JsonbSerializeUtil::jsonb_to_block( + row_store_read_struct.serdes, row_store_read_struct.row_store_buffer.data(), + row_store_read_struct.row_store_buffer.size(), row_store_read_struct.col_uid_to_idx, + result_block, row_store_read_struct.default_values, {}); + } else { + for (int x = 0; x < slots.size(); ++x) { + vectorized::MutableColumnPtr column = + result_block.get_by_position(x).column->assume_mutable(); + IteratorKey iterator_key {.tablet_id = tablet_id, + .rowset_id = rowset_id, + .segment_id = segment_id, + .slot_id = slots[x].id()}; + IteratorItem& iterator_item = iterator_map[iterator_key]; + if (iterator_item.segment == nullptr) { + iterator_map[iterator_key].segment = segment; + } + segment = iterator_item.segment; + RETURN_IF_ERROR(segment->seek_and_read_by_rowid(full_read_schema, &slots[x], row_id, + column, stats, iterator_item.iterator)); + } + } + + return Status::OK(); +} + } // namespace doris diff --git a/be/src/exec/rowid_fetcher.h b/be/src/exec/rowid_fetcher.h index 1fc8b02a6796cb..c3cc48db6d4103 100644 --- a/be/src/exec/rowid_fetcher.h +++ b/be/src/exec/rowid_fetcher.h @@ -27,6 +27,7 @@ #include "common/status.h" #include "exec/tablet_info.h" // DorisNodesInfo +#include "olap/id_manager.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" @@ -36,6 +37,11 @@ class DorisNodesInfo; class RuntimeState; class TupleDescriptor; +struct FileMapping; +struct IteratorKey; +struct IteratorItem; +struct HashOfIteratorKey; + namespace vectorized { template class ColumnStr; @@ -70,9 +76,52 @@ class RowIDFetcher { FetchOption _fetch_option; }; +struct RowStoreReadStruct { + RowStoreReadStruct(std::string& buffer) : row_store_buffer(buffer) {}; + std::string& row_store_buffer; + vectorized::DataTypeSerDeSPtrs serdes; + std::unordered_map col_uid_to_idx; + std::vector default_values; +}; + class RowIdStorageReader { public: static Status read_by_rowids(const PMultiGetRequest& request, PMultiGetResponse* response); + static Status read_by_rowids(const PMultiGetRequestV2& request, PMultiGetResponseV2* response); + +private: + static Status read_doris_format_row( + const std::shared_ptr& id_file_map, + const std::shared_ptr& file_mapping, int64_t row_id, + std::vector& slots, const TabletSchema& full_read_schema, + RowStoreReadStruct& row_store_read_struct, OlapReaderStatistics& stats, + int64_t* acquire_tablet_ms, int64_t* acquire_rowsets_ms, int64_t* acquire_segments_ms, + int64_t* lookup_row_data_ms, + std::unordered_map& iterator_map, + vectorized::Block& result_block); + + static Status read_batch_doris_format_row( + const PRequestBlockDesc& request_block_desc, std::shared_ptr id_file_map, + std::vector& slots, const TUniqueId& query_id, + vectorized::Block& result_block, OlapReaderStatistics& stats, + int64_t* acquire_tablet_ms, int64_t* acquire_rowsets_ms, int64_t* acquire_segments_ms, + int64_t* lookup_row_data_ms); + + static Status read_batch_external_row(const PRequestBlockDesc& request_block_desc, + std::shared_ptr id_file_map, + std::vector& slots, + std::shared_ptr first_file_mapping, + const TUniqueId& query_id, + vectorized::Block& result_block, int64_t* init_reader_ms, + int64_t* get_block_ms); }; +template +auto scope_timer_run(Func fn, int64_t* cost) -> decltype(fn()) { + MonotonicStopWatch watch; + watch.start(); + auto res = fn(); + *cost += watch.elapsed_time() / 1000 / 1000; + return res; +} } // namespace doris diff --git a/be/src/olap/id_manager.h b/be/src/olap/id_manager.h new file mode 100644 index 00000000000000..a311d76ac34a59 --- /dev/null +++ b/be/src/olap/id_manager.h @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "olap/olap_common.h" +#include "olap/tablet.h" +#include "olap/tablet_meta.h" + +namespace doris { + +enum class FileMappingType { + INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id} + EXTERNAL, // for external table. +}; + +struct InternalFileMappingInfo { + int64_t tablet_id; + RowsetId rowset_id; + uint32_t segment_id; + + std::string to_string() const { + std::string value; + value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id)); + auto* ptr = value.data(); + + memcpy(ptr, &tablet_id, sizeof(tablet_id)); + ptr += sizeof(tablet_id); + memcpy(ptr, &rowset_id, sizeof(rowset_id)); + ptr += sizeof(rowset_id); + memcpy(ptr, &segment_id, sizeof(segment_id)); + return value; + } +}; + +struct ExternalFileMappingInfo { + /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found + * from QueryContext according to the plan_node_id. Because there are some important information in + * TFileScanRangeParams (needed when creating hdfs/s3 reader): + * 8: optional THdfsParams hdfs_params; + * 9: optional map properties; + */ + int plan_node_id; + + /* + * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage: + * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column + * 2. path, file_type, modification_time,compress_type .... used to read the file + * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon + */ + TFileRangeDesc scan_range_desc; + bool enable_file_meta_cache; + + ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range, + bool file_meta_cache) + : plan_node_id(plan_node_id), + scan_range_desc(scan_range), + enable_file_meta_cache(file_meta_cache) {} + + std::string to_string() const { + std::string value; + value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) + + sizeof(scan_range_desc.start_offset)); + auto* ptr = value.data(); + + memcpy(ptr, &plan_node_id, sizeof(plan_node_id)); + ptr += sizeof(plan_node_id); + memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset)); + ptr += sizeof(scan_range_desc.start_offset); + memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size()); + return value; + } +}; + +struct FileMapping { + ENABLE_FACTORY_CREATOR(FileMapping); + + FileMappingType type; + std::variant value; + + FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id) + : type(FileMappingType::INTERNAL), + value(std::in_place_type, tablet_id, rowset_id, segment_id) { + } + + FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache) + : type(FileMappingType::EXTERNAL), + value(std::in_place_type, plan_node_id, scan_range, + enable_file_meta_cache) {} + + std::tuple get_doris_format_info() const { + DCHECK(type == FileMappingType::INTERNAL); + auto info = std::get(value); + return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id); + } + + ExternalFileMappingInfo& get_external_file_info() { + DCHECK(type == FileMappingType::EXTERNAL); + return std::get(value); + } + + static std::string file_mapping_info_to_string( + const std::variant& info) { + return std::visit( + [](const auto& info) -> std::string { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + return info.to_string(); + + } else if constexpr (std::is_same_v) { + return info.to_string(); + } + }, + info); + } + + std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); } +}; + +class IdFileMap { +public: + IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {} + + std::shared_ptr get_file_mapping(uint32_t id) { + std::shared_lock lock(_mtx); + auto it = _id_map.find(id); + if (it == _id_map.end()) { + return nullptr; + } + return it->second; + } + + uint32 get_file_mapping_id(const std::shared_ptr& mapping) { + DCHECK(mapping.get() != nullptr); + auto value = mapping->file_mapping_info_to_string(); + + std::unique_lock lock(_mtx); + auto it = _mapping_to_id.find(value); + if (it != _mapping_to_id.end()) { + return it->second; + } + _id_map[_init_id++] = mapping; + _mapping_to_id[value] = _init_id - 1; + + return _init_id - 1; + } + + void add_temp_rowset(const RowsetSharedPtr& rowset) { + std::unique_lock lock(_mtx); + _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset; + } + + RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) { + std::shared_lock lock(_mtx); + auto it = _temp_rowset_maps.find({tablet_id, rowset_id}); + if (it == _temp_rowset_maps.end()) { + return nullptr; + } + return it->second; + } + + int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; } + +private: + std::shared_mutex _mtx; + uint32_t _init_id = 0; + std::unordered_map _mapping_to_id; + std::unordered_map> _id_map; + + // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction + std::unordered_map, RowsetSharedPtr> _temp_rowset_maps; + uint64_t delayed_expired_timestamp = 0; +}; + +class IdManager { +public: + static constexpr uint8_t ID_VERSION = 0; + + IdManager() = default; + + ~IdManager() { + std::unique_lock lock(_query_to_id_file_map_mtx); + _query_to_id_file_map.clear(); + } + + std::shared_ptr add_id_file_map(const UniqueId& query_id, int timeout) { + std::unique_lock lock(_query_to_id_file_map_mtx); + auto it = _query_to_id_file_map.find(query_id); + if (it == _query_to_id_file_map.end()) { + auto id_file_map = std::make_shared(UnixSeconds() + timeout); + _query_to_id_file_map[query_id] = id_file_map; + return id_file_map; + } + return it->second; + } + + void gc_expired_id_file_map(int64_t now) { + std::unique_lock lock(_query_to_id_file_map_mtx); + for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) { + if (it->second->get_delayed_expired_timestamp() <= now) { + it = _query_to_id_file_map.erase(it); + } else { + ++it; + } + } + } + + void remove_id_file_map(const UniqueId& query_id) { + std::unique_lock lock(_query_to_id_file_map_mtx); + _query_to_id_file_map.erase(query_id); + } + + std::shared_ptr get_id_file_map(const UniqueId& query_id) { + std::shared_lock lock(_query_to_id_file_map_mtx); + auto it = _query_to_id_file_map.find(query_id); + if (it == _query_to_id_file_map.end()) { + return nullptr; + } + return it->second; + } + +private: + DISALLOW_COPY_AND_ASSIGN(IdManager); + + phmap::flat_hash_map> _query_to_id_file_map; + std::shared_mutex _query_to_id_file_map_mtx; +}; + +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index b55e0d2c11482c..b8b81278b2ccd6 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -1775,4 +1775,31 @@ Status DefaultNestedColumnIterator::read_by_rowids(const rowid_t* rowids, const return Status::OK(); } +Status RowIdColumnIteratorV2::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { + auto* string_column = assert_cast(dst.get()); + + for (size_t i = 0; i < *n; ++i) { + uint32_t row_id = _current_rowid + i; + GlobalRowLoacationV2 location(_version, _backend_id, _file_id, row_id); + string_column->insert_data(reinterpret_cast(&location), + sizeof(GlobalRowLoacationV2)); + } + _current_rowid += *n; + return Status::OK(); +} + +Status RowIdColumnIteratorV2::read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) { + auto* string_column = assert_cast(dst.get()); + + for (size_t i = 0; i < count; ++i) { + uint32_t row_id = rowids[i]; + GlobalRowLoacationV2 location(_version, _backend_id, _file_id, row_id); + string_column->insert_data(reinterpret_cast(&location), + sizeof(GlobalRowLoacationV2)); + } + return Status::OK(); +} + } // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index c2f3d3822d2efc..406b1bb6122c52 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -634,6 +634,41 @@ class RowIdColumnIterator : public ColumnIterator { int32_t _segment_id = 0; }; +// Add new RowIdColumnIteratorV2 +class RowIdColumnIteratorV2 : public ColumnIterator { +public: + RowIdColumnIteratorV2(uint8_t version, int64_t backend_id, uint32_t file_id) + : _version(version), _backend_id(backend_id), _file_id(file_id) {} + + Status seek_to_first() override { + _current_rowid = 0; + return Status::OK(); + } + + Status seek_to_ordinal(ordinal_t ord_idx) override { + _current_rowid = ord_idx; + return Status::OK(); + } + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { + bool has_null; + return next_batch(n, dst, &has_null); + } + + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; + + Status read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) override; + + ordinal_t get_current_ordinal() const override { return _current_rowid; } + +private: + uint32_t _current_rowid = 0; + uint8_t _version; + int64_t _backend_id; + uint32_t _file_id; +}; + class VariantRootColumnIterator : public ColumnIterator { public: VariantRootColumnIterator() = delete; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index aa6433f831855d..8d7fa96d965fd0 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -43,6 +43,7 @@ #include "olap/bloom_filter_predicate.h" #include "olap/column_predicate.h" #include "olap/field.h" +#include "olap/id_manager.h" #include "olap/iterators.h" #include "olap/like_column_predicate.h" #include "olap/match_predicate.h" @@ -1046,6 +1047,16 @@ Status SegmentIterator::_init_return_column_iterators() { new RowIdColumnIterator(_opts.tablet_id, _opts.rowset_id, _segment->id())); continue; } + + if (_schema->column(cid)->name().starts_with(BeConsts::GLOBAL_ROWID_COL)) { + auto& id_file_map = _opts.runtime_state->get_id_file_map(); + uint32_t file_id = id_file_map->get_file_mapping_id(std::make_shared( + _opts.tablet_id, _opts.rowset_id, _segment->id())); + _column_iterators[cid].reset(new RowIdColumnIteratorV2( + IdManager::ID_VERSION, BackendOptions::get_backend_id(), file_id)); + continue; + } + std::set del_cond_id_set; _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set); std::vector tmp_is_pred_column; diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index 6414db4153a02b..b71985111208e5 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -68,7 +68,8 @@ class Schema { if (column.is_key()) { ++num_key_columns; } - if (column.name() == BeConsts::ROWID_COL) { + if (column.name() == BeConsts::ROWID_COL || + column.name().starts_with(BeConsts::GLOBAL_ROWID_COL)) { _rowid_col_idx = cid; } if (column.name() == VERSION_COL) { @@ -94,7 +95,8 @@ class Schema { if (columns[i]->name() == DELETE_SIGN) { _delete_sign_idx = i; } - if (columns[i]->name() == BeConsts::ROWID_COL) { + if (columns[i]->name() == BeConsts::ROWID_COL || + columns[i]->name().starts_with(BeConsts::GLOBAL_ROWID_COL)) { _rowid_col_idx = i; } if (columns[i]->name() == VERSION_COL) { diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 237d69ff9fd7fa..875c07025a6711 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -52,6 +52,7 @@ #include "io/fs/local_file_system.h" #include "olap/binlog.h" #include "olap/data_dir.h" +#include "olap/id_manager.h" #include "olap/memtable_flush_executor.h" #include "olap/olap_common.h" #include "olap/olap_define.h" @@ -1497,6 +1498,9 @@ void BaseStorageEngine::_evict_querying_rowset() { } } } + + uint64_t now = UnixSeconds(); + ExecEnv::GetInstance()->get_id_manager()->gc_expired_id_file_map(now); } bool BaseStorageEngine::_should_delay_large_task() { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index c79d962e6558db..72d7b5e03396b9 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -396,6 +396,15 @@ class TabletSchema : public MetadataAdder { long row_store_page_size() const { return _row_store_page_size; } void set_storage_page_size(long storage_page_size) { _storage_page_size = storage_page_size; } long storage_page_size() const { return _storage_page_size; } + bool has_global_row_id() const { + for (auto [col_name, _] : _field_name_to_index) { + if (col_name.start_with(StringRef(BeConsts::GLOBAL_ROWID_COL.data(), + BeConsts::GLOBAL_ROWID_COL.size()))) { + return true; + } + } + return false; + } const std::vector inverted_indexes() const { std::vector inverted_indexes; diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index c163aad11488b5..242e39c2c54f6b 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -263,4 +263,15 @@ struct GlobalRowLoacation { } }; +struct GlobalRowLoacationV2 { + GlobalRowLoacationV2(uint8_t ver, uint64_t bid, uint32_t fid, uint32_t rid) + : version(ver), backend_id(bid), file_id(fid), row_id(rid) {} + uint8_t version; + int64_t backend_id; + uint32_t file_id; + uint32_t row_id; + + auto operator<=>(const GlobalRowLoacationV2&) const = default; +}; + } // namespace doris diff --git a/be/src/pipeline/dependency.cpp b/be/src/pipeline/dependency.cpp index 8c3d09c56e3ac6..f957a5a7c73a03 100644 --- a/be/src/pipeline/dependency.cpp +++ b/be/src/pipeline/dependency.cpp @@ -27,9 +27,11 @@ #include "runtime/exec_env.h" #include "runtime/memory/mem_tracker.h" #include "runtime_filter/runtime_filter_consumer.h" +#include "util/brpc_client_cache.h" #include "vec/exprs/vectorized_agg_fn.h" #include "vec/exprs/vslot_ref.h" #include "vec/spill/spill_stream_manager.h" +#include "vec/utils/util.hpp" namespace doris::pipeline { #include "common/compile_check_begin.h" @@ -471,4 +473,194 @@ void AggSharedState::refresh_top_limit(size_t row_id, limit_columns_min = limit_heap.top()._row_id; } +Status MaterializationSharedState::merge_multi_response(vectorized::Block* block) { + std::map> _block_maps; + for (int i = 0; i < block_order_results.size(); ++i) { + for (auto& [backend_id, rpc_struct] : rpc_struct_map) { + vectorized::Block partial_block; + RETURN_IF_ERROR( + partial_block.deserialize(rpc_struct.callback->response_->blocks(i).block())); + + if (!partial_block.is_empty_column()) { + _block_maps[backend_id] = std::make_pair(std::move(partial_block), 0); + } + } + + for (int j = 0; j < block_order_results[i].size(); ++j) { + auto backend_id = block_order_results[i][j]; + if (backend_id) { + auto& source_block_rows = _block_maps[backend_id]; + DCHECK(source_block_rows.second < source_block_rows.first.rows()); + for (int k = 0; k < response_blocks[i].columns(); ++k) { + response_blocks[i].get_column_by_position(k)->insert_from( + *source_block_rows.first.get_by_position(k).column, + source_block_rows.second); + } + source_block_rows.second++; + } else { + for (int k = 0; k < response_blocks[i].columns(); ++k) { + response_blocks[i].get_column_by_position(k)->insert_default(); + } + } + } + } + + // clear request/response + for (auto& [_, rpc_struct] : rpc_struct_map) { + for (int i = 0; i < rpc_struct.request.request_block_descs_size(); ++i) { + rpc_struct.request.mutable_request_block_descs(i)->clear_row_id(); + rpc_struct.request.mutable_request_block_descs(i)->clear_file_id(); + } + } + + for (int i = 0, j = 0, rowid_to_block_loc = rowid_locs[j]; i < origin_block.columns(); i++) { + if (i != rowid_to_block_loc) { + block->insert(origin_block.get_by_position(i)); + } else { + auto response_block = response_blocks[j].to_block(); + for (int k = 0; k < response_block.columns(); k++) { + auto& data = response_block.get_by_position(k); + response_blocks[j].mutable_columns()[k] = data.column->clone_empty(); + block->insert(data); + } + if (++j < rowid_locs.size()) { + rowid_to_block_loc = rowid_locs[j]; + } + } + } + origin_block.clear(); + + return Status::OK(); +} + +Dependency* MaterializationSharedState::create_source_dependency(int operator_id, int node_id, + const std::string& name) { + auto dep = + std::make_shared(operator_id, node_id, name + "_DEPENDENCY"); + dep->set_shared_state(this); + // just block source wait for add the counter in sink + dep->add(0); + + source_deps.push_back(dep); + return source_deps.back().get(); +} + +Status MaterializationSharedState::create_muiltget_result(const vectorized::Columns& columns, + bool eos, bool gc_id_map) { + const auto rows = columns.empty() ? 0 : columns[0]->size(); + block_order_results.resize(columns.size()); + + for (int i = 0; i < columns.size(); ++i) { + const uint8_t* null_map = nullptr; + const vectorized::ColumnString* column_rowid = nullptr; + auto& column = columns[i]; + + if (auto column_ptr = check_and_get_column(*column)) { + null_map = column_ptr->get_null_map_data().data(); + column_rowid = assert_cast( + column_ptr->get_nested_column_ptr().get()); + } else { + column_rowid = assert_cast(column.get()); + } + + auto& block_order = block_order_results[i]; + block_order.resize(rows); + + for (int j = 0; j < rows; ++j) { + if (!null_map || !null_map[j]) { + DCHECK(column_rowid->get_data_at(j).size == sizeof(GlobalRowLoacationV2)); + GlobalRowLoacationV2 row_location = + *((GlobalRowLoacationV2*)column_rowid->get_data_at(j).data); + auto rpc_struct = rpc_struct_map.find(row_location.backend_id); + if (UNLIKELY(rpc_struct == rpc_struct_map.end())) { + return Status::InternalError( + "MaterializationSinkOperatorX failed to find rpc_struct, backend_id={}", + row_location.backend_id); + } + rpc_struct->second.request.mutable_request_block_descs(i)->add_row_id( + row_location.row_id); + rpc_struct->second.request.mutable_request_block_descs(i)->add_file_id( + row_location.file_id); + block_order[j] = row_location.backend_id; + } else { + block_order[j] = 0; + } + } + } + + if (eos && gc_id_map) { + for (auto& [_, rpc_struct] : rpc_struct_map) { + rpc_struct.request.set_gc_id_map(true); + } + } + last_block = eos; + need_merge_block = rows > 0; + + return Status::OK(); +} + +Status MaterializationSharedState::init_multi_requests( + const TMaterializationNode& materialization_node, RuntimeState* state) { + rpc_struct_inited = true; + PMultiGetRequestV2 multi_get_request; + // Initialize the base struct of PMultiGetRequestV2 + multi_get_request.set_be_exec_version(state->be_exec_version()); + multi_get_request.set_wg_id(state->get_query_ctx()->workload_group()->id()); + auto query_id = multi_get_request.mutable_query_id(); + query_id->set_hi(state->query_id().hi); + query_id->set_lo(state->query_id().lo); + DCHECK_EQ(materialization_node.column_descs_lists.size(), + materialization_node.slot_locs_lists.size()); + + const auto& tuple_desc = + state->desc_tbl().get_tuple_descriptor(materialization_node.intermediate_tuple_id); + const auto& slots = tuple_desc->slots(); + response_blocks = + std::vector(materialization_node.column_descs_lists.size()); + + for (int i = 0; i < materialization_node.column_descs_lists.size(); ++i) { + auto request_block_desc = multi_get_request.add_request_block_descs(); + request_block_desc->set_fetch_row_store(materialization_node.fetch_row_stores[i]); + // Initialize the column_descs and slot_locs + auto& column_descs = materialization_node.column_descs_lists[i]; + for (auto& column_desc_item : column_descs) { + TabletColumn(column_desc_item).to_schema_pb(request_block_desc->add_column_descs()); + } + + auto& slot_locs = materialization_node.slot_locs_lists[i]; + tuple_desc->to_protobuf(request_block_desc->mutable_desc()); + + auto& column_idxs = materialization_node.column_idxs_lists[i]; + for (auto idx : column_idxs) { + request_block_desc->add_column_idxs(idx); + } + + std::vector slots_res; + for (auto& slot_loc_item : slot_locs) { + slots[slot_loc_item]->to_protobuf(request_block_desc->add_slots()); + slots_res.emplace_back(slots[slot_loc_item]); + } + response_blocks[i] = vectorized::MutableBlock(vectorized::Block(slots_res, 10)); + } + + // Initialize the stubs and requests for each BE + for (const auto& node_info : materialization_node.nodes_info.nodes) { + auto client = ExecEnv::GetInstance()->brpc_internal_client_cache()->get_client( + node_info.host, node_info.async_internal_port); + if (!client) { + LOG(WARNING) << "Get rpc stub failed, host=" << node_info.host + << ", port=" << node_info.async_internal_port; + return Status::InternalError("RowIDFetcher failed to init rpc client, host={}, port={}", + node_info.host, node_info.async_internal_port); + } + rpc_struct_map.emplace(node_info.id, FetchRpcStruct {.stub = std::move(client), + .request = multi_get_request, + .callback = nullptr}); + } + // add be_num ad count finish counter for source dependency + ((CountedFinishDependency*)source_deps.back().get())->add((int)rpc_struct_map.size()); + + return Status::OK(); +} + } // namespace doris::pipeline diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index 1935e03e24eb2d..ef4c7a1dfe554c 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -29,11 +29,14 @@ #include "common/config.h" #include "common/logging.h" +#include "gen_cpp/internal_service.pb.h" +#include "gutil/integral_types.h" #include "pipeline/common/agg_utils.h" #include "pipeline/common/join_utils.h" #include "pipeline/common/set_utils.h" #include "pipeline/exec/data_queue.h" #include "pipeline/exec/join/process_hash_table_probe.h" +#include "util/brpc_closure.h" #include "util/stack_util.h" #include "vec/common/sort/partition_sorter.h" #include "vec/common/sort/sorter.h" @@ -82,9 +85,11 @@ struct BasicSharedState { virtual ~BasicSharedState() = default; - Dependency* create_source_dependency(int operator_id, int node_id, const std::string& name); void create_source_dependencies(int num_sources, int operator_id, int node_id, const std::string& name); + virtual Dependency* create_source_dependency(int operator_id, int node_id, + const std::string& name); + Dependency* create_sink_dependency(int dest_id, int node_id, const std::string& name); std::vector get_dep_by_channel_id(int channel_id) { DCHECK_LT(channel_id, source_deps.size()); @@ -176,12 +181,12 @@ class CountedFinishDependency final : public Dependency { CountedFinishDependency(int id, int node_id, std::string name) : Dependency(id, node_id, std::move(name), true) {} - void add() { + void add(uint32_t count = 1) { std::unique_lock l(_mtx); if (!_counter) { block(); } - _counter++; + _counter += count; } void sub() { @@ -543,8 +548,8 @@ struct UnionSharedState : public BasicSharedState { const int _child_count; }; -struct CacheSharedState : public BasicSharedState { - ENABLE_FACTORY_CREATOR(CacheSharedState) +struct DataQueueSharedState : public BasicSharedState { + ENABLE_FACTORY_CREATOR(DataQueueSharedState) public: DataQueue data_queue; }; @@ -802,5 +807,100 @@ struct LocalExchangeSharedState : public BasicSharedState { } }; +//struct LocalMergeExchangeSharedState : public LocalExchangeSharedState { +// ENABLE_FACTORY_CREATOR(LocalMergeExchangeSharedState); +// LocalMergeExchangeSharedState(int num_instances) +// : LocalExchangeSharedState(num_instances), +// _each_queue_limit(config::local_exchange_buffer_mem_limit / num_instances) {} +// +// void create_dependencies(int local_exchange_id) override { +// sink_deps.resize(source_deps.size()); +// for (size_t i = 0; i < source_deps.size(); i++) { +// source_deps[i] = +// std::make_shared(local_exchange_id, local_exchange_id, +// "LOCAL_MERGE_EXCHANGE_OPERATOR_DEPENDENCY"); +// source_deps[i]->set_shared_state(this); +// sink_deps[i] = std::make_shared( +// local_exchange_id, local_exchange_id, +// "LOCAL_MERGE_EXCHANGE_OPERATOR_SINK_DEPENDENCY", true); +// sink_deps[i]->set_shared_state(this); +// } +// } +// +// void sub_total_mem_usage(size_t delta) override { mem_usage.fetch_sub(delta); } +// void add_total_mem_usage(size_t delta) override { mem_usage.fetch_add(delta); } +// +// void add_mem_usage(int channel_id, size_t delta) override { +// LocalExchangeSharedState::add_mem_usage(channel_id, delta); +// if (mem_counters[channel_id]->value() > _each_queue_limit.load()) { +// sink_deps[channel_id]->block(); +// } +// } +// +// void sub_mem_usage(int channel_id, size_t delta) override { +// LocalExchangeSharedState::sub_mem_usage(channel_id, delta); +// if (mem_counters[channel_id]->value() <= _each_queue_limit.load()) { +// sink_deps[channel_id]->set_ready(); +// } +// } +// +// void set_low_memory_mode(RuntimeState* state) override { +// _buffer_mem_limit = std::min(config::local_exchange_buffer_mem_limit, +// state->low_memory_mode_buffer_limit()); +// _each_queue_limit = std::max(64 * 1024, _buffer_mem_limit / source_deps.size()); +// } +// +// Dependency* get_sink_dep_by_channel_id(int channel_id) override { +// return sink_deps[channel_id].get(); +// } +// +// std::vector get_dep_by_channel_id(int channel_id) override { +// return source_deps; +// } +// +//private: +// std::atomic_int64_t _each_queue_limit; +//}; + +//class QueryGlobalDependency final : public Dependency { +// ENABLE_FACTORY_CREATOR(QueryGlobalDependency); +// QueryGlobalDependency(std::string name, bool ready = false) : Dependency(-1, -1, name, ready) {} +// ~QueryGlobalDependency() override = default; +// Dependency* is_blocked_by(PipelineTask* task = nullptr) override; +//}; + +struct FetchRpcStruct { + std::shared_ptr stub; + PMultiGetRequestV2 request; + std::shared_ptr> callback; + MonotonicStopWatch rpc_timer; +}; + +struct MaterializationSharedState : public BasicSharedState { + ENABLE_FACTORY_CREATOR(MaterializationSharedState) +public: + MaterializationSharedState() = default; + + Status init_multi_requests(const TMaterializationNode& tnode, RuntimeState* state); + Status create_muiltget_result(const vectorized::Columns& columns, bool eos, bool gc_id_map); + Status merge_multi_response(vectorized::Block* block); + + Dependency* create_source_dependency(int operator_id, int node_id, + const std::string& name) override; + + bool rpc_struct_inited = false; + Status rpc_status = Status::OK(); + bool last_block = false; + // empty materialization sink block not need to merge block + bool need_merge_block = true; + vectorized::Block origin_block; + // The rowid column of the origin block. should be replaced by the column of the result block. + std::vector rowid_locs; + std::vector response_blocks; + std::map rpc_struct_map; + // Register each line in which block to ensure the order of the result. + // Zero means NULL value. + std::vector> block_order_results; +}; #include "common/compile_check_end.h" } // namespace doris::pipeline diff --git a/be/src/pipeline/exec/cache_sink_operator.h b/be/src/pipeline/exec/cache_sink_operator.h index e77badbb9acec5..117e5864e78f52 100644 --- a/be/src/pipeline/exec/cache_sink_operator.h +++ b/be/src/pipeline/exec/cache_sink_operator.h @@ -33,14 +33,14 @@ namespace pipeline { class DataQueue; class CacheSinkOperatorX; -class CacheSinkLocalState final : public PipelineXSinkLocalState { +class CacheSinkLocalState final : public PipelineXSinkLocalState { public: ENABLE_FACTORY_CREATOR(CacheSinkLocalState); CacheSinkLocalState(DataSinkOperatorXBase* parent, RuntimeState* state) : Base(parent, state) {} Status init(RuntimeState* state, LocalSinkStateInfo& info) override; Status open(RuntimeState* state) override; friend class CacheSinkOperatorX; - using Base = PipelineXSinkLocalState; + using Base = PipelineXSinkLocalState; using Parent = CacheSinkOperatorX; }; @@ -62,7 +62,7 @@ class CacheSinkOperatorX final : public DataSinkOperatorX { Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos) override; std::shared_ptr create_shared_state() const override { - std::shared_ptr ss = std::make_shared(); + std::shared_ptr ss = std::make_shared(); ss->id = operator_id(); for (auto& dest : dests_id()) { ss->related_op_ids.insert(dest); diff --git a/be/src/pipeline/exec/cache_source_operator.cpp b/be/src/pipeline/exec/cache_source_operator.cpp index 7efe0e7588fe15..04c371c4335afc 100644 --- a/be/src/pipeline/exec/cache_source_operator.cpp +++ b/be/src/pipeline/exec/cache_source_operator.cpp @@ -34,7 +34,7 @@ Status CacheSourceLocalState::init(RuntimeState* state, LocalStateInfo& info) { RETURN_IF_ERROR(Base::init(state, info)); SCOPED_TIMER(exec_time_counter()); SCOPED_TIMER(_init_timer); - ((CacheSharedState*)_dependency->shared_state()) + ((DataQueueSharedState*)_dependency->shared_state()) ->data_queue.set_source_dependency(_shared_state->source_deps.front()); const auto& scan_ranges = info.scan_ranges; bool hit_cache = false; diff --git a/be/src/pipeline/exec/cache_source_operator.h b/be/src/pipeline/exec/cache_source_operator.h index 947fd00852be4f..49f0c376c47cbe 100644 --- a/be/src/pipeline/exec/cache_source_operator.h +++ b/be/src/pipeline/exec/cache_source_operator.h @@ -36,10 +36,10 @@ namespace pipeline { class DataQueue; class CacheSourceOperatorX; -class CacheSourceLocalState final : public PipelineXLocalState { +class CacheSourceLocalState final : public PipelineXLocalState { public: ENABLE_FACTORY_CREATOR(CacheSourceLocalState); - using Base = PipelineXLocalState; + using Base = PipelineXLocalState; using Parent = CacheSourceOperatorX; CacheSourceLocalState(RuntimeState* state, OperatorXBase* parent) : Base(state, parent) {}; diff --git a/be/src/pipeline/exec/materialization_sink_operator.cpp b/be/src/pipeline/exec/materialization_sink_operator.cpp new file mode 100644 index 00000000000000..f5f58d820522f0 --- /dev/null +++ b/be/src/pipeline/exec/materialization_sink_operator.cpp @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "pipeline/exec/materialization_sink_operator.h" + +#include +#include +#include + +#include + +#include "common/status.h" +#include "pipeline/exec/operator.h" +#include "util/brpc_client_cache.h" +#include "util/brpc_closure.h" +#include "vec/columns/column.h" +#include "vec/core/block.h" + +namespace doris { +namespace pipeline { + +Status MaterializationSinkOperatorX::init(const doris::TPlanNode& tnode, + doris::RuntimeState* state) { + RETURN_IF_ERROR(DataSinkOperatorX::init(tnode, state)); + DCHECK(tnode.__isset.materialization_node); + _materialization_node = tnode.materialization_node; + _gc_id_map = tnode.materialization_node.gc_id_map; + // Create result_expr_ctx_lists_ from thrift exprs. + auto& fetch_expr_lists = tnode.materialization_node.fetch_expr_lists; + RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(fetch_expr_lists, _rowid_exprs)); + return Status::OK(); +} + +Status MaterializationSinkOperatorX::prepare(RuntimeState* state) { + RETURN_IF_ERROR(vectorized::VExpr::prepare(_rowid_exprs, state, _child->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::open(_rowid_exprs, state)); + return Status::OK(); +} + +template +class MaterializationCallback : public ::doris::DummyBrpcCallback { + ENABLE_FACTORY_CREATOR(MaterializationCallback); + +public: + MaterializationCallback(std::weak_ptr tast_exec_ctx, + MaterializationSharedState* shared_state, MonotonicStopWatch& rpc_timer, + std::string&& query_id) + : _tast_exec_ctx(std::move(tast_exec_ctx)), + _shared_state(shared_state), + _rpc_timer(rpc_timer), + _query_id(std::move(query_id)) {} + + ~MaterializationCallback() override = default; + MaterializationCallback(const MaterializationCallback& other) = delete; + MaterializationCallback& operator=(const MaterializationCallback& other) = delete; + + void call() noexcept override { + auto tast_exec_ctx = _tast_exec_ctx.lock(); + if (!tast_exec_ctx) { + return; + } + + _rpc_timer.stop(); + if (::doris::DummyBrpcCallback::cntl_->Failed()) { + std::string err = fmt::format( + "failed to send brpc when exchange, error={}, error_text={}, client: {}, " + "latency = {}", + berror(::doris::DummyBrpcCallback::cntl_->ErrorCode()), + ::doris::DummyBrpcCallback::cntl_->ErrorText(), + BackendOptions::get_localhost(), + ::doris::DummyBrpcCallback::cntl_->latency_us()); + _shared_state->rpc_status = Status::InternalError(err); + } else { + LOG(INFO) << "happen lee call before query id:" << _query_id; + _shared_state->rpc_status = + Status::create(doris::DummyBrpcCallback::response_->status()); + LOG(INFO) << "happen lee call after query id:" << _query_id; + } + ((CountedFinishDependency*)_shared_state->source_deps.back().get())->sub(); + } + +private: + std::weak_ptr _tast_exec_ctx; + MaterializationSharedState* _shared_state; + MonotonicStopWatch& _rpc_timer; + std::string _query_id; +}; + +Status MaterializationSinkOperatorX::sink(RuntimeState* state, vectorized::Block* in_block, + bool eos) { + auto& local_state = get_local_state(state); + SCOPED_TIMER(local_state.exec_time_counter()); + COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows()); + if (!local_state._shared_state->rpc_struct_inited) { + RETURN_IF_ERROR( + local_state._shared_state->init_multi_requests(_materialization_node, state)); + } + + if (in_block->rows() > 0 || eos) { + // block the pipeline wait the rpc response + if (!eos) { + local_state._shared_state->sink_deps.back()->block(); + } + // execute the rowid exprs + vectorized::Columns columns; + if (in_block->rows() != 0) { + local_state._shared_state->rowid_locs.resize(_rowid_exprs.size()); + for (int i = 0; i < _rowid_exprs.size(); ++i) { + auto& rowid_expr = _rowid_exprs[i]; + RETURN_IF_ERROR( + rowid_expr->execute(in_block, &local_state._shared_state->rowid_locs[i])); + columns.emplace_back( + in_block->get_by_position(local_state._shared_state->rowid_locs[i]).column); + } + local_state._shared_state->origin_block.swap(*in_block); + } + RETURN_IF_ERROR( + local_state._shared_state->create_muiltget_result(columns, eos, _gc_id_map)); + + for (auto& [backend_id, rpc_struct] : local_state._shared_state->rpc_struct_map) { + auto callback = MaterializationCallback::create_shared( + state->get_task_execution_context(), local_state._shared_state, + rpc_struct.rpc_timer, print_id(rpc_struct.request.query_id())); + callback->cntl_->set_timeout_ms(config::fetch_rpc_timeout_seconds * 1000); + auto closure = + AutoReleaseClosure>:: + create_unique( + std::make_shared(), callback, state->get_query_ctx_weak(), + "Materialization Sink node id:" + std::to_string(node_id()) + + " target_backend_id:" + std::to_string(backend_id)); + // send brpc request + rpc_struct.callback = callback; + rpc_struct.rpc_timer.start(); + rpc_struct.stub->multiget_data_v2(callback->cntl_.get(), &rpc_struct.request, + callback->response_.get(), closure.release()); + } + } + + return Status::OK(); +} + +} // namespace pipeline +} // namespace doris \ No newline at end of file diff --git a/be/src/pipeline/exec/materialization_sink_operator.h b/be/src/pipeline/exec/materialization_sink_operator.h new file mode 100644 index 00000000000000..813d12e017dbfb --- /dev/null +++ b/be/src/pipeline/exec/materialization_sink_operator.h @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "operator.h" +#include "vec/core/block.h" + +namespace doris { +#include "common/compile_check_begin.h" +class RuntimeState; + +namespace pipeline { + +class MaterializationSinkOperatorX; +class MaterializationSinkLocalState final + : public PipelineXSinkLocalState { +public: + ENABLE_FACTORY_CREATOR(MaterializationSinkLocalState); + MaterializationSinkLocalState(DataSinkOperatorXBase* parent, RuntimeState* state) + : Base(parent, state) {} + +private: + friend class MaterializationSinkOperatorX; + using Base = PipelineXSinkLocalState; + using Parent = MaterializationSinkOperatorX; +}; + +class MaterializationSinkOperatorX final : public DataSinkOperatorX { +public: + using Base = DataSinkOperatorX; + + friend class MaterializationSinkLocalState; + MaterializationSinkOperatorX(int child_id, int sink_id, ObjectPool* pool, + const TPlanNode& tnode) + : Base(sink_id, tnode.node_id, child_id) { + _name = "MATERIALIZATION_SINK_OPERATOR"; + } + ~MaterializationSinkOperatorX() override = default; + + Status init(const TPlanNode& tnode, RuntimeState* state) override; + Status prepare(RuntimeState* state) override; + Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos) override; + +private: + // Materialized slot by this node. The i-th result expr list refers to a slot of RowId + TMaterializationNode _materialization_node; + vectorized::VExprContextSPtrs _rowid_exprs; + bool _gc_id_map = false; +}; + +} // namespace pipeline +#include "common/compile_check_end.h" +} // namespace doris \ No newline at end of file diff --git a/be/src/pipeline/exec/materialization_source_operator.cpp b/be/src/pipeline/exec/materialization_source_operator.cpp new file mode 100644 index 00000000000000..e9eb7a02d21243 --- /dev/null +++ b/be/src/pipeline/exec/materialization_source_operator.cpp @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "pipeline/exec/materialization_source_operator.h" + +#include + +#include "common/status.h" +#include "vec/core/block.h" + +namespace doris::pipeline { + +Status MaterializationSourceOperatorX::get_block(RuntimeState* state, vectorized::Block* block, + bool* eos) { + auto& local_state = get_local_state(state); + SCOPED_TIMER(local_state.exec_time_counter()); + if (!local_state._shared_state->rpc_status.ok()) { + return local_state._shared_state->rpc_status; + } + + // clear origin block, do merge response to build a ret block + block->clear(); + if (local_state._shared_state->need_merge_block) { + SCOPED_TIMER(local_state._merge_response_timer); + RETURN_IF_ERROR(local_state._shared_state->merge_multi_response(block)); + } + *eos = local_state._shared_state->last_block; + + if (!*eos) { + local_state._shared_state->sink_deps.back()->set_ready(); + + ((CountedFinishDependency*)(local_state._shared_state->source_deps.back().get())) + ->add(local_state._shared_state->rpc_struct_map.size()); + } else { + uint64_t max_rpc_time = 0; + for (auto& [_, rpc_struct] : local_state._shared_state->rpc_struct_map) { + max_rpc_time = std::max(max_rpc_time, rpc_struct.rpc_timer.elapsed_time()); + } + COUNTER_SET(local_state._max_rpc_timer, (int64_t)max_rpc_time); + } + + return Status::OK(); +} + +} // namespace doris::pipeline \ No newline at end of file diff --git a/be/src/pipeline/exec/materialization_source_operator.h b/be/src/pipeline/exec/materialization_source_operator.h new file mode 100644 index 00000000000000..0c6a8b91047539 --- /dev/null +++ b/be/src/pipeline/exec/materialization_source_operator.h @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "operator.h" +#include "vec/core/block.h" + +namespace doris { +#include "common/compile_check_begin.h" +class RuntimeState; + +namespace pipeline { + +class MaterializationSourceOperatorX; +class MaterializationSourceLocalState final + : public PipelineXLocalState { +public: + ENABLE_FACTORY_CREATOR(MaterializationSourceLocalState); + using Base = PipelineXLocalState; + using Parent = MaterializationSourceOperatorX; + MaterializationSourceLocalState(RuntimeState* state, OperatorXBase* parent) + : Base(state, parent) {}; + + Status init(doris::RuntimeState* state, doris::pipeline::LocalStateInfo& info) override { + RETURN_IF_ERROR(Base::init(state, info)); + _max_rpc_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "MaxRpcTime", 2); + _merge_response_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "MergeResponseTime", 2); + return Status::OK(); + } + +private: + RuntimeProfile::Counter* _max_rpc_timer = nullptr; + RuntimeProfile::Counter* _merge_response_timer = nullptr; + + friend class MaterializationSourceOperatorX; + friend class OperatorX; +}; + +class MaterializationSourceOperatorX final : public OperatorX { +public: + using Base = OperatorX; + MaterializationSourceOperatorX(ObjectPool* pool, const TPlanNode& tnode, const int operator_id, + const DescriptorTbl& descs) + : Base(pool, tnode, operator_id, descs) {}; + ~MaterializationSourceOperatorX() override = default; + + Status get_block(doris::RuntimeState* state, vectorized::Block* block, bool* eos) override; + + bool is_source() const override { return true; } +}; + +} // namespace pipeline +#include "common/compile_check_end.h" +} // namespace doris \ No newline at end of file diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp index 0357bb7d358538..25ddc4b8b8f7d6 100644 --- a/be/src/pipeline/exec/operator.cpp +++ b/be/src/pipeline/exec/operator.cpp @@ -43,6 +43,8 @@ #include "pipeline/exec/jdbc_scan_operator.h" #include "pipeline/exec/jdbc_table_sink_operator.h" #include "pipeline/exec/local_merge_sort_source_operator.h" +#include "pipeline/exec/materialization_sink_operator.h" +#include "pipeline/exec/materialization_source_operator.h" #include "pipeline/exec/memory_scratch_sink_operator.h" #include "pipeline/exec/meta_scan_operator.h" #include "pipeline/exec/mock_operator.h" @@ -773,6 +775,7 @@ DECLARE_OPERATOR(PartitionedHashJoinSinkLocalState) DECLARE_OPERATOR(GroupCommitBlockSinkLocalState) DECLARE_OPERATOR(CacheSinkLocalState) DECLARE_OPERATOR(DictSinkLocalState) +DECLARE_OPERATOR(MaterializationSinkLocalState) #undef DECLARE_OPERATOR @@ -806,6 +809,7 @@ DECLARE_OPERATOR(MetaScanLocalState) DECLARE_OPERATOR(LocalExchangeSourceLocalState) DECLARE_OPERATOR(PartitionedHashJoinProbeLocalState) DECLARE_OPERATOR(CacheSourceLocalState) +DECLARE_OPERATOR(MaterializationSourceLocalState) #ifdef BE_TEST DECLARE_OPERATOR(MockLocalState) @@ -839,7 +843,7 @@ template class PipelineXSinkLocalState; template class PipelineXSinkLocalState; template class PipelineXSinkLocalState; template class PipelineXSinkLocalState; -template class PipelineXSinkLocalState; +template class PipelineXSinkLocalState; template class PipelineXLocalState; template class PipelineXLocalState; @@ -851,7 +855,7 @@ template class PipelineXLocalState; template class PipelineXLocalState; template class PipelineXLocalState; template class PipelineXLocalState; -template class PipelineXLocalState; +template class PipelineXLocalState; template class PipelineXLocalState; template class PipelineXLocalState; template class PipelineXLocalState; diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index b01e372d1fbb5a..7b2361e45180e9 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -95,6 +95,15 @@ Status ScanLocalState::open(RuntimeState* state) { } RETURN_IF_ERROR(PipelineXLocalState<>::open(state)); auto& p = _parent->cast(); + + // init id_file_map() for runtime state + std::vector slots = p._output_tuple_desc->slots(); + for (auto slot : slots) { + if (slot->col_name().starts_with(BeConsts::GLOBAL_ROWID_COL)) { + state->set_id_file_map(); + } + } + _common_expr_ctxs_push_down.resize(p._common_expr_ctxs_push_down.size()); for (size_t i = 0; i < _common_expr_ctxs_push_down.size(); i++) { RETURN_IF_ERROR( @@ -1015,6 +1024,7 @@ Status ScanLocalState::_start_scanners( // https://github.com/apache/doris/pull/44635 const int parallism_of_scan_operator = p.is_serial_operator() ? 1 : p.query_parallel_instance_num(); + _scanner_ctx = vectorized::ScannerContext::create_shared( state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(), _scan_dependency, parallism_of_scan_operator); diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index d31d91660f9a98..f75503ef7a7f4b 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -65,6 +65,8 @@ #include "pipeline/exec/jdbc_scan_operator.h" #include "pipeline/exec/jdbc_table_sink_operator.h" #include "pipeline/exec/local_merge_sort_source_operator.h" +#include "pipeline/exec/materialization_sink_operator.h" +#include "pipeline/exec/materialization_source_operator.h" #include "pipeline/exec/memory_scratch_sink_operator.h" #include "pipeline/exec/meta_scan_operator.h" #include "pipeline/exec/multi_cast_data_stream_sink.h" @@ -1581,6 +1583,25 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo RETURN_IF_ERROR(cur_pipe->sink()->init(tnode, _runtime_state.get())); break; } + case TPlanNodeType::MATERIALIZATION_NODE: { + op.reset(new MaterializationSourceOperatorX(pool, tnode, next_operator_id(), descs)); + RETURN_IF_ERROR(cur_pipe->add_operator( + op, request.__isset.parallel_instances ? request.parallel_instances : 0)); + + const auto downstream_pipeline_id = cur_pipe->id(); + if (_dag.find(downstream_pipeline_id) == _dag.end()) { + _dag.insert({downstream_pipeline_id, {}}); + } + auto new_pipe = add_pipeline(cur_pipe); + _dag[downstream_pipeline_id].push_back(new_pipe->id()); + + DataSinkOperatorPtr sink(new MaterializationSinkOperatorX( + op->operator_id(), next_sink_operator_id(), pool, tnode)); + RETURN_IF_ERROR(new_pipe->set_sink(sink)); + RETURN_IF_ERROR(new_pipe->sink()->init(tnode, _runtime_state.get())); + cur_pipe = new_pipe; + break; + } case TPlanNodeType::INTERSECT_NODE: { RETURN_IF_ERROR(_build_operators_for_set_operation_node( pool, tnode, descs, op, cur_pipe, parent_idx, child_idx, request)); diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index ea5514f668c078..fee5030a27e656 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -116,6 +116,7 @@ class LookupConnectionCache; class RowCache; class DummyLRUCache; class CacheManager; +class IdManager; class ProcessProfile; class HeapProfiler; class WalManager; @@ -335,6 +336,7 @@ class ExecEnv { LookupConnectionCache* get_lookup_connection_cache() { return _lookup_connection_cache; } RowCache* get_row_cache() { return _row_cache; } CacheManager* get_cache_manager() { return _cache_manager; } + IdManager* get_id_manager() { return _id_manager; } ProcessProfile* get_process_profile() { return _process_profile; } HeapProfiler* get_heap_profiler() { return _heap_profiler; } segment_v2::InvertedIndexSearcherCache* get_inverted_index_searcher_cache() { @@ -483,6 +485,7 @@ class ExecEnv { LookupConnectionCache* _lookup_connection_cache = nullptr; RowCache* _row_cache = nullptr; CacheManager* _cache_manager = nullptr; + IdManager* _id_manager = nullptr; ProcessProfile* _process_profile = nullptr; HeapProfiler* _heap_profiler = nullptr; segment_v2::InvertedIndexSearcherCache* _inverted_index_searcher_cache = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 4ab06630f1b5e8..02f873eccd81e4 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -47,6 +47,7 @@ #include "io/cache/fs_file_cache_storage.h" #include "io/fs/file_meta_cache.h" #include "io/fs/local_file_reader.h" +#include "olap/id_manager.h" #include "olap/memtable_memory_limiter.h" #include "olap/olap_define.h" #include "olap/options.h" @@ -452,6 +453,7 @@ Status ExecEnv::_init_mem_env() { return Status::InternalError(ss.str()); } + _id_manager = new IdManager(); _cache_manager = CacheManager::create_global_instance(); int64_t storage_cache_limit = @@ -719,6 +721,9 @@ void ExecEnv::destroy() { _load_stream_map_pool.reset(); SAFE_STOP(_write_cooldown_meta_executors); + // _id_manager must be destoried before tablet schema cache + SAFE_DELETE(_id_manager); + // StorageEngine must be destoried before _cache_manager destory SAFE_STOP(_storage_engine); _storage_engine.reset(); diff --git a/be/src/runtime/query_context.h b/be/src/runtime/query_context.h index 57a0e235339669..c1ff54001675cf 100644 --- a/be/src/runtime/query_context.h +++ b/be/src/runtime/query_context.h @@ -32,7 +32,6 @@ #include "common/config.h" #include "common/factory_creator.h" #include "common/object_pool.h" -#include "pipeline/dependency.h" #include "runtime/exec_env.h" #include "runtime/memory/mem_tracker_limiter.h" #include "runtime/runtime_predicate.h" @@ -48,6 +47,7 @@ namespace doris { namespace pipeline { class PipelineFragmentContext; class PipelineTask; +class Dependency; } // namespace pipeline struct ReportStatusRequest { @@ -233,6 +233,7 @@ class QueryContext : public std::enable_shared_from_this { TNetworkAddress coord_addr; TNetworkAddress current_connect_fe; TQueryGlobals query_globals; + const TQueryGlobals get_query_globals() const { return query_globals; } ObjectPool obj_pool; @@ -373,6 +374,8 @@ class QueryContext : public std::enable_shared_from_this { timespec get_query_arrival_timestamp() const { return this->_query_arrival_timestamp; } QuerySource get_query_source() const { return this->_query_source; } + + const TQueryOptions get_query_options() const { return _query_options; } }; } // namespace doris diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 41ca4e40e59ac9..b9e96ad08924e5 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -36,6 +36,7 @@ #include "common/object_pool.h" #include "common/status.h" #include "io/fs/s3_file_system.h" +#include "olap/id_manager.h" #include "olap/storage_engine.h" #include "pipeline/exec/operator.h" #include "pipeline/pipeline_task.h" @@ -528,5 +529,8 @@ bool RuntimeState::low_memory_mode() const { return _query_ctx->low_memory_mode(); } +void RuntimeState::set_id_file_map() { + _id_file_map = _exec_env->get_id_manager()->add_id_file_map(_query_id, execution_timeout()); +} #include "common/compile_check_end.h" } // end namespace doris diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index e4ecf59563c170..a7096e72122c5a 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -69,6 +69,7 @@ class Dependency; class DescriptorTbl; class ObjectPool; class ExecEnv; +class IdFileMap; class RuntimeFilterMgr; class MemTrackerLimiter; class QueryContext; @@ -657,6 +658,10 @@ class RuntimeState { int profile_level() const { return _profile_level; } + std::shared_ptr& get_id_file_map() { return _id_file_map; } + + void set_id_file_map(); + private: Status create_error_log_file(); @@ -783,6 +788,9 @@ class RuntimeState { // error file path on s3, ${bucket}/${prefix}/error_log/${label}_${fragment_instance_id} std::string _s3_error_log_file_path; std::mutex _s3_error_log_file_lock; + + // used for encoding the global lazy materialize + std::shared_ptr _id_file_map = nullptr; }; #define RETURN_IF_CANCELLED(state) \ diff --git a/be/src/runtime/workload_group/workload_group.h b/be/src/runtime/workload_group/workload_group.h index da99923f146e6d..3d61b463810062 100644 --- a/be/src/runtime/workload_group/workload_group.h +++ b/be/src/runtime/workload_group/workload_group.h @@ -232,7 +232,7 @@ class WorkloadGroup : public std::enable_shared_from_this { friend class DummyWorkloadGroupTest; -protected: +private: void create_cgroup_cpu_ctl_no_lock(); void upsert_cgroup_cpu_ctl_no_lock(WorkloadGroupInfo* wg_info); void upsert_thread_pool_no_lock(WorkloadGroupInfo* wg_info, diff --git a/be/src/runtime_filter/runtime_filter.cpp b/be/src/runtime_filter/runtime_filter.cpp index 0b61c98f382a3a..c9f6baecda6c4b 100644 --- a/be/src/runtime_filter/runtime_filter.cpp +++ b/be/src/runtime_filter/runtime_filter.cpp @@ -18,6 +18,7 @@ #include "runtime_filter/runtime_filter.h" #include "common/status.h" +#include "runtime/runtime_state.h" #include "util/brpc_client_cache.h" #include "util/brpc_closure.h" #include "vec/exprs/vexpr.h" diff --git a/be/src/service/backend_options.h b/be/src/service/backend_options.h index 0052eb41530aee..543863a6309146 100644 --- a/be/src/service/backend_options.h +++ b/be/src/service/backend_options.h @@ -37,6 +37,7 @@ class BackendOptions { static std::string get_be_endpoint(); static TBackend get_local_backend(); static void set_backend_id(int64_t backend_id); + static int64_t get_backend_id() { return _s_backend_id; } static void set_localhost(const std::string& host); static bool is_bind_ipv6(); static const char* get_service_bind_address(); diff --git a/be/src/service/brpc.h b/be/src/service/brpc.h index c4657229d455a5..1e588c5957334a 100644 --- a/be/src/service/brpc.h +++ b/be/src/service/brpc.h @@ -17,9 +17,12 @@ #pragma once +#ifdef EINTERNAL +#undef EINTERNAL +#endif + // all header need by brpc is contain in this file. // include this file instead of include . - #include #include #include diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index daff95d9615019..b4c5c1e9263d75 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -92,6 +92,8 @@ #include "runtime/stream_load/stream_load_context.h" #include "runtime/thread_context.h" #include "runtime/types.h" +#include "runtime/workload_group/workload_group.h" +#include "runtime/workload_group/workload_group_manager.h" #include "service/backend_options.h" #include "service/point_query_executor.h" #include "util/arrow/row_batch.h" @@ -2084,6 +2086,48 @@ void PInternalService::multiget_data(google::protobuf::RpcController* controller } } +void PInternalService::multiget_data_v2(google::protobuf::RpcController* controller, + const PMultiGetRequestV2* request, + PMultiGetResponseV2* response, + google::protobuf::Closure* done) { + auto wg = ExecEnv::GetInstance()->workload_group_mgr()->get_group(request->wg_id()); + Status st = Status::OK(); + + if (!wg) [[unlikely]] { + brpc::ClosureGuard closure_guard(done); + st = Status::Error("fail to find wg: wg id:" + + std::to_string(request->wg_id())); + st.to_protobuf(response->mutable_status()); + return; + } + + doris::pipeline::TaskScheduler* exec_sched = nullptr; + vectorized::SimplifiedScanScheduler* scan_sched = nullptr; + vectorized::SimplifiedScanScheduler* remote_scan_sched = nullptr; + wg->get_query_scheduler(&exec_sched, &scan_sched, &remote_scan_sched); + DCHECK(remote_scan_sched); + + st = remote_scan_sched->submit_scan_task(vectorized::SimplifiedScanTask( + [request, response, done]() { + signal::set_signal_task_id(request->query_id()); + // multi get data by rowid + MonotonicStopWatch watch; + watch.start(); + brpc::ClosureGuard closure_guard(done); + response->mutable_status()->set_status_code(0); + SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->rowid_storage_reader_tracker()); + Status st = RowIdStorageReader::read_by_rowids(*request, response); + st.to_protobuf(response->mutable_status()); + LOG(INFO) << "multiget_data finished, cost(us):" << watch.elapsed_time() / 1000; + }, + nullptr)); + + if (!st.ok()) { + brpc::ClosureGuard closure_guard(done); + st.to_protobuf(response->mutable_status()); + } +} + void PInternalServiceImpl::get_tablet_rowset_versions(google::protobuf::RpcController* cntl_base, const PGetTabletVersionsRequest* request, PGetTabletVersionsResponse* response, diff --git a/be/src/service/internal_service.h b/be/src/service/internal_service.h index f5776df0d4795b..0737db149aeadb 100644 --- a/be/src/service/internal_service.h +++ b/be/src/service/internal_service.h @@ -190,6 +190,10 @@ class PInternalService : public PBackendService { void multiget_data(google::protobuf::RpcController* controller, const PMultiGetRequest* request, PMultiGetResponse* response, google::protobuf::Closure* done) override; + void multiget_data_v2(google::protobuf::RpcController* controller, + const PMultiGetRequestV2* request, PMultiGetResponseV2* response, + google::protobuf::Closure* done) override; + void tablet_fetch_data(google::protobuf::RpcController* controller, const PTabletKeyLookupRequest* request, PTabletKeyLookupResponse* response, diff --git a/be/src/util/brpc_closure.h b/be/src/util/brpc_closure.h index 29b93684671f20..c1be06c691d65a 100644 --- a/be/src/util/brpc_closure.h +++ b/be/src/util/brpc_closure.h @@ -25,7 +25,6 @@ #include "runtime/query_context.h" #include "runtime/thread_context.h" #include "service/brpc.h" -#include "util/brpc_closure.h" namespace doris { @@ -82,7 +81,7 @@ class AutoReleaseClosure : public google::protobuf::Closure { public: AutoReleaseClosure(std::shared_ptr req, std::shared_ptr callback, - std::weak_ptr context = {}) + std::weak_ptr context = {}, std::string_view error_msg = {}) : request_(req), callback_(callback), context_(std::move(context)) { this->cntl_ = callback->cntl_; this->response_ = callback->response_; @@ -113,10 +112,12 @@ class AutoReleaseClosure : public google::protobuf::Closure { // at any stage. std::shared_ptr request_; std::shared_ptr response_; + std::string error_msg_; protected: virtual void _process_if_rpc_failed() { - std::string error_msg = "RPC meet failed: " + cntl_->ErrorText(); + std::string error_msg = + fmt::format("RPC meet failed: {} {}", cntl_->ErrorText(), error_msg_); if (auto ctx = context_.lock(); ctx) { ctx->cancel(Status::NetworkError(error_msg)); } else { diff --git a/be/src/vec/common/string_ref.cpp b/be/src/vec/common/string_ref.cpp index 413c0338c1001e..e113694f34c3e5 100644 --- a/be/src/vec/common/string_ref.cpp +++ b/be/src/vec/common/string_ref.cpp @@ -69,11 +69,14 @@ bool StringRef::end_with(char ch) const { } bool StringRef::start_with(const StringRef& search_string) const { - DCHECK(size >= search_string.size); if (search_string.size == 0) { return true; } + if (UNLIKELY(size < search_string.size)) { + return false; + } + #if defined(__SSE2__) || defined(__aarch64__) return memequalSSE2Wide(data, search_string.data, search_string.size); #else diff --git a/be/src/vec/exec/format/generic_reader.h b/be/src/vec/exec/format/generic_reader.h index 4605f4b97b0c4b..8fe9ba8454db1f 100644 --- a/be/src/vec/exec/format/generic_reader.h +++ b/be/src/vec/exec/format/generic_reader.h @@ -70,12 +70,25 @@ class GenericReader : public ProfileCollector { virtual Status close() { return Status::OK(); } + Status set_read_lines_mode(const std::list& read_lines) { + _read_line_mode_mode = true; + _read_lines = read_lines; + return _set_read_one_line_impl(); + } + protected: + virtual Status _set_read_one_line_impl() { + return Status::NotSupported("set_read_lines_mode is not implemented for this reader."); + } + const size_t _MIN_BATCH_SIZE = 4064; // 4094 - 32(padding) /// Whether the underlying FileReader has filled the partition&missing columns bool _fill_all_columns = false; TPushAggOp::type _push_down_agg_type {}; + + bool _read_line_mode_mode = false; + std::list _read_lines; }; #include "common/compile_check_end.h" diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 4b0eeb1b4342a4..de8effc3b6b66f 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -44,6 +44,8 @@ #include "exprs/hybrid_set.h" #include "io/fs/buffered_reader.h" #include "io/fs/file_reader.h" +#include "olap/id_manager.h" +#include "olap/utils.h" #include "orc/Exceptions.hh" #include "orc/Int128.hh" #include "orc/MemoryPool.hh" @@ -1224,6 +1226,7 @@ Status OrcReader::set_fill_columns( } _row_reader = _reader->createRowReader(_row_reader_options, _orc_filter.get(), _string_dict_filter.get()); + _batch = _row_reader->createRowBatch(_batch_size); const auto& selected_type = _row_reader->getSelectedType(); int idx = 0; @@ -1349,6 +1352,20 @@ Status OrcReader::_fill_missing_columns( return Status::OK(); } +Status OrcReader::_fill_row_id_columns(Block* block) { + if (_row_id_column_iterator_pair.first != nullptr) { + RETURN_IF_ERROR( + _row_id_column_iterator_pair.first->seek_to_ordinal(_row_reader->getRowNumber())); + size_t fill_size = _batch->numElements; + + auto col = block->get_by_position(_row_id_column_iterator_pair.second) + .column->assume_mutable(); + RETURN_IF_ERROR(_row_id_column_iterator_pair.first->next_batch(&fill_size, col)); + } + + return Status::OK(); +} + void OrcReader::_init_bloom_filter( std::unordered_map* colname_to_value_range) { // generate bloom filter @@ -1953,6 +1970,11 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof return Status::OK(); } + if (!_seek_to_read_one_line()) { + *eof = true; + return Status::OK(); + } + if (_lazy_read_ctx.can_lazy_read) { std::vector columns_to_filter; int column_to_keep = block->columns(); @@ -2006,6 +2028,8 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof RETURN_IF_ERROR( _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns)); + RETURN_IF_ERROR(_fill_row_id_columns(block)); + if (block->rows() == 0) { RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block, nullptr)); *eof = true; @@ -2093,6 +2117,8 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof RETURN_IF_ERROR( _fill_missing_columns(block, _batch->numElements, _lazy_read_ctx.missing_columns)); + RETURN_IF_ERROR(_fill_row_id_columns(block)); + if (block->rows() == 0) { RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block, nullptr)); *eof = true; @@ -2144,7 +2170,7 @@ Status OrcReader::get_next_block_impl(Block* block, size_t* read_rows, bool* eof _execute_filter_position_delete_rowids(*_delete_rows_filter_ptr); RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal( block, columns_to_filter, (*_delete_rows_filter_ptr))); - } else { + } else if (_position_delete_ordered_rowids != nullptr) { std::unique_ptr filter(new IColumn::Filter(block->rows(), 1)); _execute_filter_position_delete_rowids(*filter); RETURN_IF_CATCH_EXCEPTION( diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index f76dc693c5aaaf..8efcea35036148 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -38,6 +38,7 @@ #include "io/fs/file_reader.h" #include "io/fs/file_reader_writer_fwd.h" #include "olap/olap_common.h" +#include "olap/rowset/segment_v2/column_reader.h" #include "orc/Reader.hh" #include "orc/Type.hh" #include "orc/Vector.hh" @@ -59,6 +60,9 @@ namespace doris { class RuntimeState; class TFileRangeDesc; class TFileScanRangeParams; +namespace segment_v2 { +class RowIdColumnIteratorV2; +} namespace io { class FileSystem; struct IOContext; @@ -209,6 +213,12 @@ class OrcReader : public GenericReader { static DataTypePtr convert_to_doris_type(const orc::Type* orc_type); static std::string get_field_name_lower_case(const orc::Type* orc_type, int pos); + void set_row_id_column_iterator( + const std::pair, int>& + iterator_pair) { + _row_id_column_iterator_pair = iterator_pair; + } + protected: void _collect_profile_before_close() override; @@ -576,6 +586,24 @@ class OrcReader : public GenericReader { return true; } + Status _fill_row_id_columns(Block* block); + + bool _seek_to_read_one_line() { + if (_read_line_mode_mode) { + if (_read_lines.empty()) { + return false; + } + _row_reader->seekToRow(_read_lines.front()); + _read_lines.pop_front(); + } + return true; + } + + Status _set_read_one_line_impl() override { + _batch_size = 1; + return Status::OK(); + } + private: // This is only for count(*) short circuit read. // save the total number of rows in range @@ -673,6 +701,9 @@ class OrcReader : public GenericReader { int64_t _orc_tiny_stripe_threshold_bytes = 8L * 1024L * 1024L; int64_t _orc_once_max_read_bytes = 8L * 1024L * 1024L; int64_t _orc_max_merge_distance_bytes = 1L * 1024L * 1024L; + + std::pair, int> + _row_id_column_iterator_pair = {nullptr, -1}; }; class StripeStreamInputStream : public orc::InputStream, public ProfileCollector { diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 7e11a97cc4b9ad..870dc374e3b0fb 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -295,11 +295,14 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ // Process external table query task that select columns are all from path. if (_read_columns.empty()) { - RETURN_IF_ERROR(_read_empty_batch(batch_size, read_rows, batch_eof)); + bool modify_row_ids = false; + RETURN_IF_ERROR(_read_empty_batch(batch_size, read_rows, batch_eof, &modify_row_ids)); RETURN_IF_ERROR( _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); + RETURN_IF_ERROR(_fill_row_id_columns(block, *read_rows, modify_row_ids)); + Status st = VExprContext::filter_block(_lazy_read_ctx.conjuncts, block, block->columns()); *read_rows = block->rows(); return st; @@ -314,6 +317,7 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ RETURN_IF_ERROR( _fill_partition_columns(block, *read_rows, _lazy_read_ctx.partition_columns)); RETURN_IF_ERROR(_fill_missing_columns(block, *read_rows, _lazy_read_ctx.missing_columns)); + RETURN_IF_ERROR(_fill_row_id_columns(block, *read_rows, false)); if (block->rows() == 0) { _convert_dict_cols_to_string_cols(block); @@ -368,6 +372,10 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ void RowGroupReader::_merge_read_ranges(std::vector& row_ranges) { _read_ranges = row_ranges; + _remaining_rows = 0; + for (auto& range : row_ranges) { + _remaining_rows += range.last_row - range.first_row; + } } Status RowGroupReader::_read_column_data(Block* block, const std::vector& columns, @@ -456,6 +464,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re _lazy_read_ctx.predicate_partition_columns)); RETURN_IF_ERROR(_fill_missing_columns(block, pre_read_rows, _lazy_read_ctx.predicate_missing_columns)); + RETURN_IF_ERROR(_fill_row_id_columns(block, pre_read_rows, false)); RETURN_IF_ERROR(_build_pos_delete_filter(pre_read_rows)); @@ -508,6 +517,11 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re for (auto& col : _lazy_read_ctx.predicate_missing_columns) { block->get_by_name(col.first).column->assume_mutable()->clear(); } + if (_row_id_column_iterator_pair.first != nullptr) { + block->get_by_position(_row_id_column_iterator_pair.second) + .column->assume_mutable() + ->clear(); + } Block::erase_useless_column(block, origin_column_num); } @@ -700,17 +714,21 @@ Status RowGroupReader::_fill_missing_columns( return Status::OK(); } -Status RowGroupReader::_read_empty_batch(size_t batch_size, size_t* read_rows, bool* batch_eof) { +Status RowGroupReader::_read_empty_batch(size_t batch_size, size_t* read_rows, bool* batch_eof, + bool* modify_row_ids) { + *modify_row_ids = false; if (_position_delete_ctx.has_filter) { int64_t start_row_id = _position_delete_ctx.current_row_id; int64_t end_row_id = std::min(_position_delete_ctx.current_row_id + (int64_t)batch_size, _position_delete_ctx.last_row_id); int64_t num_delete_rows = 0; + auto before_index = _position_delete_ctx.index; while (_position_delete_ctx.index < _position_delete_ctx.end_index) { const int64_t& delete_row_id = _position_delete_ctx.delete_rows[_position_delete_ctx.index]; if (delete_row_id < start_row_id) { _position_delete_ctx.index++; + before_index = _position_delete_ctx.index; } else if (delete_row_id < end_row_id) { num_delete_rows++; _position_delete_ctx.index++; @@ -721,6 +739,21 @@ Status RowGroupReader::_read_empty_batch(size_t batch_size, size_t* read_rows, b *read_rows = end_row_id - start_row_id - num_delete_rows; _position_delete_ctx.current_row_id = end_row_id; *batch_eof = _position_delete_ctx.current_row_id == _position_delete_ctx.last_row_id; + + if (_row_id_column_iterator_pair.first != nullptr) { + *modify_row_ids = true; + _current_batch_row_ids.clear(); + _current_batch_row_ids.resize(*read_rows); + size_t idx = 0; + for (auto id = start_row_id; id < end_row_id; id++) { + if (before_index < _position_delete_ctx.index && + id == _position_delete_ctx.delete_rows[before_index]) { + before_index++; + continue; + } + _current_batch_row_ids[idx++] = (rowid_t)id; + } + } } else { if (batch_size < _remaining_rows) { *read_rows = batch_size; @@ -732,6 +765,47 @@ Status RowGroupReader::_read_empty_batch(size_t batch_size, size_t* read_rows, b *batch_eof = true; } } + _total_read_rows += *read_rows; + return Status::OK(); +} + +Status RowGroupReader::_get_current_batch_row_id(size_t read_rows) { + _current_batch_row_ids.clear(); + _current_batch_row_ids.resize(read_rows); + + int64_t idx = 0; + int64_t read_range_rows = 0; + for (auto& range : _read_ranges) { + if (read_rows == 0) { + break; + } + if (read_range_rows + (range.last_row - range.first_row) > _total_read_rows) { + auto fi = std::max(_total_read_rows - read_range_rows, 0L) + range.first_row; + auto len = std::min(read_rows, (size_t)std::max(range.last_row - fi, 0L)); + read_rows -= len; + + for (auto i = 0; i < len; i++) { + _current_batch_row_ids[idx++] = + (rowid_t)(fi + i + _current_row_group_idx.first_row); + } + } + read_range_rows += range.last_row - range.first_row; + } + return Status::OK(); +} + +Status RowGroupReader::_fill_row_id_columns(Block* block, size_t read_rows, + bool is_current_row_ids) { + if (_row_id_column_iterator_pair.first != nullptr) { + if (!is_current_row_ids) { + RETURN_IF_ERROR(_get_current_batch_row_id(read_rows)); + } + auto col = block->get_by_position(_row_id_column_iterator_pair.second) + .column->assume_mutable(); + RETURN_IF_ERROR(_row_id_column_iterator_pair.first->read_by_rowids( + _current_batch_row_ids.data(), _current_batch_row_ids.size(), col)); + } + return Status::OK(); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h b/be/src/vec/exec/format/parquet/vparquet_group_reader.h index 8f384b3e22a73c..35bf6970d6f631 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h @@ -28,6 +28,8 @@ #include #include "io/fs/file_reader_writer_fwd.h" +#include "olap/id_manager.h" +#include "olap/utils.h" #include "vec/columns/column.h" #include "vec/exec/format/parquet/parquet_common.h" #include "vec/exprs/vexpr_fwd.h" @@ -164,6 +166,15 @@ class RowGroupReader : public ProfileCollector { void set_remaining_rows(int64_t rows) { _remaining_rows = rows; } int64_t get_remaining_rows() { return _remaining_rows; } + void set_row_id_column_iterator( + const std::pair, int>& iterator_pair) { + _row_id_column_iterator_pair = iterator_pair; + } + + void set_current_row_group_idx(RowGroupIndex row_group_idx) { + _current_row_group_idx = row_group_idx; + } + protected: void _collect_profile_before_close() override { if (_file_reader != nullptr) { @@ -173,7 +184,8 @@ class RowGroupReader : public ProfileCollector { private: void _merge_read_ranges(std::vector& row_ranges); - Status _read_empty_batch(size_t batch_size, size_t* read_rows, bool* batch_eof); + Status _read_empty_batch(size_t batch_size, size_t* read_rows, bool* batch_eof, + bool* modify_row_ids); Status _read_column_data(Block* block, const std::vector& columns, size_t batch_size, size_t* read_rows, bool* batch_eof, FilterMap& filter_map); @@ -199,6 +211,9 @@ class RowGroupReader : public ProfileCollector { Status _rewrite_dict_conjuncts(std::vector& dict_codes, int slot_id, bool is_nullable); void _convert_dict_cols_to_string_cols(Block* block); + Status _get_current_batch_row_id(size_t read_rows); + Status _fill_row_id_columns(Block* block, size_t read_rows, bool is_current_row_ids); + io::FileReaderSPtr _file_reader; std::unordered_map> _column_readers; const std::vector& _read_columns; @@ -230,6 +245,11 @@ class RowGroupReader : public ProfileCollector { RuntimeState* _state = nullptr; std::shared_ptr _obj_pool; bool _is_row_group_filtered = false; + + RowGroupIndex _current_row_group_idx {0, 0, 0}; + std::pair, int> _row_id_column_iterator_pair = {nullptr, + -1}; + std::vector _current_batch_row_ids; }; #include "common/compile_check_end.h" diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index eb120a77def9ed..4c37e2e55c3cf5 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -446,6 +446,9 @@ Status ParquetReader::set_fill_columns( } } } + if (_row_id_column_iterator_pair.first != nullptr) { + _lazy_read_ctx.all_predicate_col_ids.emplace_back(_row_id_column_iterator_pair.second); + } for (auto& kv : partition_columns) { auto iter = predicate_columns.find(kv.first); @@ -636,7 +639,8 @@ Status ParquetReader::_next_row_group_reader() { // process page index and generate the ranges to read auto& row_group = _t_metadata->row_groups[row_group_index.row_group_id]; std::vector candidate_row_ranges; - RETURN_IF_ERROR(_process_page_index(row_group, candidate_row_ranges)); + + RETURN_IF_ERROR(_process_page_index(row_group, row_group_index, candidate_row_ranges)); RowGroupReader::PositionDeleteContext position_delete_ctx = _get_position_delete_ctx(row_group, row_group_index); @@ -659,6 +663,8 @@ Status ParquetReader::_next_row_group_reader() { group_file_reader, _read_columns, row_group_index.row_group_id, row_group, _ctz, _io_ctx, position_delete_ctx, _lazy_read_ctx, _state)); _row_group_eof = false; + _current_group_reader->set_current_row_group_idx(row_group_index); + _current_group_reader->set_row_id_column_iterator(_row_id_column_iterator_pair); return _current_group_reader->init(_file_metadata->schema(), candidate_row_ranges, _col_offsets, _tuple_descriptor, _row_descriptor, _colname_to_slot_id, _not_single_slot_filter_conjuncts, @@ -671,6 +677,7 @@ Status ParquetReader::_init_row_groups(const bool& is_filter_groups) { return Status::EndOfFile("No row group to read"); } int64_t row_index = 0; + _read_line_mode_row_ranges.resize(_total_groups); for (int32_t row_group_idx = 0; row_group_idx < _total_groups; row_group_idx++) { const tparquet::RowGroup& row_group = _t_metadata->row_groups[row_group_idx]; if (is_filter_groups && _is_misaligned_range_group(row_group)) { @@ -679,8 +686,11 @@ Status ParquetReader::_init_row_groups(const bool& is_filter_groups) { } bool filter_group = false; if (is_filter_groups) { - RETURN_IF_ERROR(_process_row_group_filter(row_group, &filter_group)); + RowGroupReader::RowGroupIndex row_group_index {row_group_idx, row_index, + row_index + row_group.num_rows}; + RETURN_IF_ERROR(_process_row_group_filter(row_group_index, row_group, &filter_group)); } + int64_t group_size = 0; // only calculate the needed columns std::function column_compressed_size = [&row_group, &column_compressed_size](const FieldSchema* field) -> int64_t { @@ -784,10 +794,17 @@ bool ParquetReader::_has_page_index(const std::vector& co } Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group, + const RowGroupReader::RowGroupIndex& row_group_index, std::vector& candidate_row_ranges) { if (UNLIKELY(_io_ctx && _io_ctx->should_stop)) { return Status::EndOfFile("stop"); } + + if (_read_line_mode_mode) { + candidate_row_ranges = _read_line_mode_row_ranges[row_group_index.row_group_id]; + return Status::OK(); + } + SCOPED_RAW_TIMER(&_statistics.page_index_filter_time); std::function read_whole_row_group = [&]() { @@ -901,13 +918,34 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group, return Status::OK(); } -Status ParquetReader::_process_row_group_filter(const tparquet::RowGroup& row_group, - bool* filter_group) { - RETURN_IF_ERROR(_process_column_stat_filter(row_group.columns, filter_group)); - _init_chunk_dicts(); - RETURN_IF_ERROR(_process_dict_filter(filter_group)); - _init_bloom_filter(); - RETURN_IF_ERROR(_process_bloom_filter(filter_group)); +Status ParquetReader::_process_row_group_filter( + const RowGroupReader::RowGroupIndex& row_group_index, const tparquet::RowGroup& row_group, + bool* filter_group) { + if (_read_line_mode_mode) { + auto group_start = row_group_index.first_row; + auto group_end = row_group_index.last_row; + + while (!_read_lines.empty()) { + auto v = _read_lines.front(); + if (v >= group_start && v < group_end) { + _read_line_mode_row_ranges[row_group_index.row_group_id].emplace_back( + RowRange {v - group_start, v - group_start + 1}); + _read_lines.pop_front(); + } else { + break; + } + } + + if (_read_line_mode_row_ranges[row_group_index.row_group_id].empty()) { + *filter_group = true; + } + } else { + RETURN_IF_ERROR(_process_column_stat_filter(row_group.columns, filter_group)); + _init_chunk_dicts(); + RETURN_IF_ERROR(_process_dict_filter(filter_group)); + _init_bloom_filter(); + RETURN_IF_ERROR(_process_bloom_filter(filter_group)); + } return Status::OK(); } diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h index f1f3df5332f0d0..d8aadcef7157ad 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_reader.h @@ -153,6 +153,11 @@ class ParquetReader : public GenericReader { _table_col_to_file_col = map; } + void set_row_id_column_iterator( + std::pair, int> iterator_pair) { + _row_id_column_iterator_pair = iterator_pair; + } + protected: void _collect_profile_before_close() override; @@ -206,13 +211,15 @@ class ParquetReader : public GenericReader { // Page Index Filter bool _has_page_index(const std::vector& columns, PageIndex& page_index); Status _process_page_index(const tparquet::RowGroup& row_group, + const RowGroupReader::RowGroupIndex& row_group_index, std::vector& candidate_row_ranges); // Row Group Filter bool _is_misaligned_range_group(const tparquet::RowGroup& row_group); Status _process_column_stat_filter(const std::vector& column_meta, bool* filter_group); - Status _process_row_group_filter(const tparquet::RowGroup& row_group, bool* filter_group); + Status _process_row_group_filter(const RowGroupReader::RowGroupIndex& row_group_index, + const tparquet::RowGroup& row_group, bool* filter_group); void _init_chunk_dicts(); Status _process_dict_filter(bool* filter_group); void _init_bloom_filter(); @@ -225,6 +232,8 @@ class ParquetReader : public GenericReader { static SortOrder _determine_sort_order(const tparquet::SchemaElement& parquet_schema); + Status _set_read_one_line_impl() override { return Status::OK(); } + private: RuntimeProfile* _profile = nullptr; const TFileScanRangeParams& _scan_params; @@ -294,6 +303,10 @@ class ParquetReader : public GenericReader { const std::unordered_map* _slot_id_to_filter_conjuncts = nullptr; bool _hive_use_column_names = false; std::unordered_map _ignored_stats; + + std::vector> _read_line_mode_row_ranges; + std::pair, int> _row_id_column_iterator_pair = {nullptr, + -1}; }; #include "common/compile_check_end.h" diff --git a/be/src/vec/exec/scan/file_scanner.cpp b/be/src/vec/exec/scan/file_scanner.cpp index c03f7354d95c56..0b13a62b88639f 100644 --- a/be/src/vec/exec/scan/file_scanner.cpp +++ b/be/src/vec/exec/scan/file_scanner.cpp @@ -37,6 +37,7 @@ #include "common/config.h" #include "common/logging.h" #include "common/status.h" +#include "exec/rowid_fetcher.h" #include "io/cache/block_file_cache_profile.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" @@ -152,9 +153,9 @@ Status FileScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjun "RuntimeFilterPartitionPrunedRangeNum", TUnit::UNIT, 1); _file_cache_statistics.reset(new io::FileCacheStatistics()); - _io_ctx.reset(new io::IOContext()); + + RETURN_IF_ERROR(_init_io_ctx()); _io_ctx->file_cache_stats = _file_cache_statistics.get(); - _io_ctx->query_id = &_state->query_id(); if (_is_load) { _src_row_desc.reset(new RowDescriptor(_state->desc_tbl(), @@ -347,8 +348,9 @@ void FileScanner::_get_slot_ids(VExpr* expr, std::vector* slot_ids) { if (child_expr->is_slot_ref()) { VSlotRef* slot_ref = reinterpret_cast(child_expr.get()); slot_ids->emplace_back(slot_ref->slot_id()); + } else { + _get_slot_ids(child_expr.get(), slot_ids); } - _get_slot_ids(child_expr.get(), slot_ids); } } @@ -373,6 +375,7 @@ Status FileScanner::open(RuntimeState* state) { Status FileScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof) { Status st = _get_block_wrapped(state, block, eof); + if (!st.ok()) { // add cur path in error msg for easy debugging return std::move(st.append(". cur path: " + get_current_scan_range_name())); @@ -860,6 +863,18 @@ void FileScanner::_truncate_char_or_varchar_column(Block* block, int idx, int le Block::erase_useless_column(block, num_columns_without_result); } +Status FileScanner::_create_row_id_column_iterator(const int column_id) { + auto& id_file_map = _state->get_id_file_map(); + auto file_id = id_file_map->get_file_mapping_id(std::make_shared( + ((pipeline::FileScanLocalState*)_local_state)->parent_id(), _current_range, + _should_enable_file_meta_cache())); + _row_id_column_iterator_pair = std::make_pair( + std::make_shared(IdManager::ID_VERSION, + BackendOptions::get_backend_id(), file_id), + column_id); + return Status::OK(); +} + Status FileScanner::_get_next_reader() { while (true) { if (_cur_reader) { @@ -906,10 +921,8 @@ Status FileScanner::_get_next_reader() { } // create reader for specific format - Status init_status; - // for compatibility, if format_type is not set in range, use the format type of params - TFileFormatType::type format_type = - range.__isset.format_type ? range.format_type : _params->format_type; + Status init_status = Status::OK(); + TFileFormatType::type format_type = _get_current_format_type(); // JNI reader can only push down column value range bool push_down_predicates = !_is_load && _params->format_type != TFileFormatType::FORMAT_JNI; @@ -928,6 +941,7 @@ Status FileScanner::_get_next_reader() { } } } + bool need_to_get_parsed_schema = false; switch (format_type) { case TFileFormatType::FORMAT_JNI: { @@ -979,6 +993,8 @@ Status FileScanner::_get_next_reader() { _should_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache() : nullptr, _state->query_options().enable_parquet_lazy_mat); + parquet_reader->set_row_id_column_iterator(_row_id_column_iterator_pair); + // ATTN: the push down agg type may be set back to NONE, // see IcebergTableReader::init_row_filters for example. parquet_reader->set_push_down_agg_type(_get_push_down_agg_type()); @@ -989,59 +1005,7 @@ Status FileScanner::_get_next_reader() { if (push_down_predicates) { RETURN_IF_ERROR(_process_late_arrival_conjuncts()); } - if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "iceberg") { - std::unique_ptr iceberg_reader = - IcebergParquetReader::create_unique(std::move(parquet_reader), _profile, - _state, *_params, range, _kv_cache, - _io_ctx.get()); - init_status = iceberg_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, - &_slot_id_to_filter_conjuncts); - _cur_reader = std::move(iceberg_reader); - } else if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "paimon") { - std::unique_ptr paimon_reader = - PaimonParquetReader::create_unique(std::move(parquet_reader), _profile, - _state, *_params, range, _io_ctx.get()); - init_status = paimon_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, - &_slot_id_to_filter_conjuncts); - RETURN_IF_ERROR(paimon_reader->init_row_filters()); - _cur_reader = std::move(paimon_reader); - } else if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "hudi") { - std::unique_ptr hudi_reader = - HudiParquetReader::create_unique(std::move(parquet_reader), _profile, - _state, *_params, range, _io_ctx.get()); - init_status = hudi_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, - &_slot_id_to_filter_conjuncts); - _cur_reader = std::move(hudi_reader); - } else { - bool hive_parquet_use_column_names = true; - - if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "hive" && _state != nullptr) - [[likely]] { - hive_parquet_use_column_names = - _state->query_options().hive_parquet_use_column_names; - } - - std::vector place_holder; - init_status = parquet_reader->init_reader( - _file_col_names, place_holder, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, - &_slot_id_to_filter_conjuncts, true, hive_parquet_use_column_names); - _cur_reader = std::move(parquet_reader); - } + RETURN_IF_ERROR(_init_parquet_reader(std::move(parquet_reader))); need_to_get_parsed_schema = true; break; } @@ -1049,70 +1013,12 @@ Status FileScanner::_get_next_reader() { std::unique_ptr orc_reader = OrcReader::create_unique( _profile, _state, *_params, range, _state->query_options().batch_size, _state->timezone(), _io_ctx.get(), _state->query_options().enable_orc_lazy_mat); + orc_reader->set_row_id_column_iterator(_row_id_column_iterator_pair); orc_reader->set_push_down_agg_type(_get_push_down_agg_type()); if (push_down_predicates) { RETURN_IF_ERROR(_process_late_arrival_conjuncts()); } - if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "transactional_hive") { - std::unique_ptr tran_orc_reader = - TransactionalHiveReader::create_unique(std::move(orc_reader), _profile, - _state, *_params, range, - _io_ctx.get()); - init_status = tran_orc_reader->init_reader( - _file_col_names, _colname_to_value_range, _push_down_conjuncts, - _real_tuple_desc, _default_val_row_desc.get(), - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); - RETURN_IF_ERROR(tran_orc_reader->init_row_filters()); - _cur_reader = std::move(tran_orc_reader); - } else if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "iceberg") { - std::unique_ptr iceberg_reader = - IcebergOrcReader::create_unique(std::move(orc_reader), _profile, _state, - *_params, range, _kv_cache, _io_ctx.get()); - - init_status = iceberg_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, - &_slot_id_to_filter_conjuncts); - _cur_reader = std::move(iceberg_reader); - } else if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "paimon") { - std::unique_ptr paimon_reader = PaimonOrcReader::create_unique( - std::move(orc_reader), _profile, _state, *_params, range, _io_ctx.get()); - - init_status = paimon_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); - RETURN_IF_ERROR(paimon_reader->init_row_filters()); - _cur_reader = std::move(paimon_reader); - } else if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "hudi") { - std::unique_ptr hudi_reader = HudiOrcReader::create_unique( - std::move(orc_reader), _profile, _state, *_params, range, _io_ctx.get()); - - init_status = hudi_reader->init_reader( - _file_col_names, _col_id_name_map, _colname_to_value_range, - _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(), - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); - _cur_reader = std::move(hudi_reader); - } else { - bool hive_orc_use_column_names = true; - - if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "hive" && _state != nullptr) - [[likely]] { - hive_orc_use_column_names = _state->query_options().hive_orc_use_column_names; - } - init_status = orc_reader->init_reader( - &_file_col_names, {}, _colname_to_value_range, _push_down_conjuncts, false, - _real_tuple_desc, _default_val_row_desc.get(), - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts, - hive_orc_use_column_names); - _cur_reader = std::move(orc_reader); - } + RETURN_IF_ERROR(_init_orc_reader(std::move(orc_reader))); need_to_get_parsed_schema = true; break; } @@ -1182,41 +1088,248 @@ Status FileScanner::_get_next_reader() { return Status::InternalError("failed to init reader, err: {}", init_status.to_string()); } - _name_to_col_type.clear(); - _missing_cols.clear(); - RETURN_IF_ERROR(_cur_reader->get_columns(&_name_to_col_type, &_missing_cols)); _cur_reader->set_push_down_agg_type(_get_push_down_agg_type()); - RETURN_IF_ERROR(_generate_missing_columns()); - RETURN_IF_ERROR(_cur_reader->set_fill_columns(_partition_col_descs, _missing_col_descs)); - if (VLOG_NOTICE_IS_ON && !_missing_cols.empty() && _is_load) { - fmt::memory_buffer col_buf; - for (auto& col : _missing_cols) { - fmt::format_to(col_buf, " {}", col); - } - VLOG_NOTICE << fmt::format("Unknown columns:{} in file {}", fmt::to_string(col_buf), - range.path); + RETURN_IF_ERROR(_set_fill_or_truncate_columns(need_to_get_parsed_schema)); + _cur_reader_eof = false; + break; + } + return Status::OK(); +} + +Status FileScanner::_init_parquet_reader(std::unique_ptr&& parquet_reader) { + const TFileRangeDesc& range = _current_range; + Status init_status = Status::OK(); + + if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "iceberg") { + std::unique_ptr iceberg_reader = + IcebergParquetReader::create_unique(std::move(parquet_reader), _profile, _state, + *_params, range, _kv_cache, _io_ctx.get()); + init_status = iceberg_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_conjuncts, + _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, + &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _cur_reader = std::move(iceberg_reader); + } else if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "paimon") { + std::unique_ptr paimon_reader = PaimonParquetReader::create_unique( + std::move(parquet_reader), _profile, _state, *_params, range, _io_ctx.get()); + init_status = paimon_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_conjuncts, + _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, + &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + RETURN_IF_ERROR(paimon_reader->init_row_filters()); + _cur_reader = std::move(paimon_reader); + } else if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "hudi") { + std::unique_ptr hudi_reader = HudiParquetReader::create_unique( + std::move(parquet_reader), _profile, _state, *_params, range, _io_ctx.get()); + init_status = hudi_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_conjuncts, + _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, + &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _cur_reader = std::move(hudi_reader); + } else { + bool hive_parquet_use_column_names = true; + + if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "hive" && _state != nullptr) [[likely]] { + hive_parquet_use_column_names = _state->query_options().hive_parquet_use_column_names; } - _source_file_col_names.clear(); - _source_file_col_types.clear(); - _source_file_col_name_types.clear(); - if (_state->query_options().truncate_char_or_varchar_columns && need_to_get_parsed_schema) { - Status status = _cur_reader->get_parsed_schema(&_source_file_col_names, - &_source_file_col_types); - if (!status.ok() && status.code() != TStatusCode::NOT_IMPLEMENTED_ERROR) { - return status; - } - DCHECK(_source_file_col_names.size() == _source_file_col_types.size()); - for (int i = 0; i < _source_file_col_names.size(); ++i) { - _source_file_col_name_types[_source_file_col_names[i]] = _source_file_col_types[i]; - } + std::vector place_holder; + init_status = parquet_reader->init_reader( + _file_col_names, place_holder, _colname_to_value_range, _push_down_conjuncts, + _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, + &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts, true, + hive_parquet_use_column_names); + _cur_reader = std::move(parquet_reader); + } + + return init_status; +} + +Status FileScanner::_init_orc_reader(std::unique_ptr&& orc_reader) { + const TFileRangeDesc& range = _current_range; + Status init_status = Status::OK(); + + if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "transactional_hive") { + std::unique_ptr tran_orc_reader = + TransactionalHiveReader::create_unique(std::move(orc_reader), _profile, _state, + *_params, range, _io_ctx.get()); + init_status = tran_orc_reader->init_reader( + _file_col_names, _colname_to_value_range, _push_down_conjuncts, _real_tuple_desc, + _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); + RETURN_IF_ERROR(tran_orc_reader->init_row_filters()); + _cur_reader = std::move(tran_orc_reader); + } else if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "iceberg") { + std::unique_ptr iceberg_reader = IcebergOrcReader::create_unique( + std::move(orc_reader), _profile, _state, *_params, range, _kv_cache, _io_ctx.get()); + + init_status = iceberg_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_conjuncts, + _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, + &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _cur_reader = std::move(iceberg_reader); + } else if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "paimon") { + std::unique_ptr paimon_reader = PaimonOrcReader::create_unique( + std::move(orc_reader), _profile, _state, *_params, range, _io_ctx.get()); + + init_status = paimon_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_conjuncts, + _real_tuple_desc, _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); + RETURN_IF_ERROR(paimon_reader->init_row_filters()); + _cur_reader = std::move(paimon_reader); + } else if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "hudi") { + std::unique_ptr hudi_reader = HudiOrcReader::create_unique( + std::move(orc_reader), _profile, _state, *_params, range, _io_ctx.get()); + + init_status = hudi_reader->init_reader( + _file_col_names, _col_id_name_map, _colname_to_value_range, _push_down_conjuncts, + _real_tuple_desc, _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); + _cur_reader = std::move(hudi_reader); + } else { + bool hive_orc_use_column_names = true; + + if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "hive" && _state != nullptr) [[likely]] { + hive_orc_use_column_names = _state->query_options().hive_orc_use_column_names; + } + init_status = orc_reader->init_reader( + &_file_col_names, {}, _colname_to_value_range, _push_down_conjuncts, false, + _real_tuple_desc, _default_val_row_desc.get(), &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts, hive_orc_use_column_names); + _cur_reader = std::move(orc_reader); + } + + return init_status; +} + +Status FileScanner::_set_fill_or_truncate_columns(bool need_to_get_parsed_schema) { + _name_to_col_type.clear(); + _missing_cols.clear(); + RETURN_IF_ERROR(_cur_reader->get_columns(&_name_to_col_type, &_missing_cols)); + RETURN_IF_ERROR(_generate_missing_columns()); + RETURN_IF_ERROR(_cur_reader->set_fill_columns(_partition_col_descs, _missing_col_descs)); + if (VLOG_NOTICE_IS_ON && !_missing_cols.empty() && _is_load) { + fmt::memory_buffer col_buf; + for (auto& col : _missing_cols) { + fmt::format_to(col_buf, " {}", col); + } + VLOG_NOTICE << fmt::format("Unknown columns:{} in file {}", fmt::to_string(col_buf), + _current_range.path); + } + + RETURN_IF_ERROR(_generate_truncate_columns(need_to_get_parsed_schema)); + return Status::OK(); +} + +Status FileScanner::_generate_truncate_columns(bool need_to_get_parsed_schema) { + _source_file_col_names.clear(); + _source_file_col_types.clear(); + _source_file_col_name_types.clear(); + if (_state->query_options().truncate_char_or_varchar_columns && need_to_get_parsed_schema) { + Status status = + _cur_reader->get_parsed_schema(&_source_file_col_names, &_source_file_col_types); + if (!status.ok() && status.code() != TStatusCode::NOT_IMPLEMENTED_ERROR) { + return status; + } + DCHECK(_source_file_col_names.size() == _source_file_col_types.size()); + for (int i = 0; i < _source_file_col_names.size(); ++i) { + _source_file_col_name_types[_source_file_col_names[i]] = _source_file_col_types[i]; } - _cur_reader_eof = false; - break; } return Status::OK(); } +Status FileScanner::prepare_for_read_one_line(const TFileRangeDesc& range) { + _current_range = range; + + RETURN_IF_ERROR(_init_io_ctx()); + _default_val_row_desc.reset(new RowDescriptor((TupleDescriptor*)_real_tuple_desc, false)); + RETURN_IF_ERROR(_init_expr_ctxes()); + + // Since only one column is read from the file, there is no need to filter, so set these variables to empty. + static std::unordered_map colname_to_value_range; + _colname_to_value_range = &colname_to_value_range; + _push_down_conjuncts.clear(); + _not_single_slot_filter_conjuncts.clear(); + _slot_id_to_filter_conjuncts.clear(); + _kv_cache = nullptr; + return Status::OK(); +} + +Status FileScanner::read_one_line_from_range(const TFileRangeDesc& range, + const segment_v2::rowid_t rowid, Block* result_block, + const ExternalFileMappingInfo& external_info, + int64_t* init_reader_ms, int64_t* get_block_ms) { + _current_range = range; + RETURN_IF_ERROR(_generate_parititon_columns()); + + TFileFormatType::type format_type = _get_current_format_type(); + Status init_status = Status::OK(); + + RETURN_IF_ERROR(scope_timer_run( + [&]() -> Status { + switch (format_type) { + case TFileFormatType::FORMAT_PARQUET: { + std::unique_ptr parquet_reader = + vectorized::ParquetReader::create_unique( + _profile, *_params, range, 1, + const_cast(&_state->timezone_obj()), + _io_ctx.get(), _state, + external_info.enable_file_meta_cache + ? ExecEnv::GetInstance()->file_meta_cache() + : nullptr, + false); + + RETURN_IF_ERROR(parquet_reader->open()); + RETURN_IF_ERROR(parquet_reader->set_read_lines_mode({rowid})); + RETURN_IF_ERROR(_init_parquet_reader(std::move(parquet_reader))); + break; + } + case TFileFormatType::FORMAT_ORC: { + std::unique_ptr orc_reader = + vectorized::OrcReader::create_unique(_profile, _state, *_params, range, + 1, _state->timezone(), + _io_ctx.get(), false); + + RETURN_IF_ERROR(orc_reader->set_read_lines_mode({rowid})); + RETURN_IF_ERROR(_init_orc_reader(std::move(orc_reader))); + break; + } + default: { + return Status::InternalError( + "Failed to create one line reader for file format: {}," + "only support parquet and orc", + _params->format_type); + } + } + return Status::OK(); + }, + init_reader_ms)); + + RETURN_IF_ERROR(_set_fill_or_truncate_columns(true)); + _cur_reader_eof = false; + + RETURN_IF_ERROR(scope_timer_run( + [&]() -> Status { + bool eof = false; + return _get_block_impl(_state, result_block, &eof); + }, + get_block_ms)); + + RETURN_IF_ERROR(_cur_reader->close()); + return Status::OK(); +} + Status FileScanner::_generate_parititon_columns() { _partition_col_descs.clear(); const TFileRangeDesc& range = _current_range; @@ -1297,6 +1410,11 @@ Status FileScanner::_init_expr_ctxes() { return Status::InternalError( fmt::format("Unknown source slot descriptor, slot_id={}", slot_id)); } + if (it->second->col_name().starts_with(BeConsts::GLOBAL_ROWID_COL)) { + RETURN_IF_ERROR( + _create_row_id_column_iterator(_default_val_row_desc->get_column_id(slot_id))); + continue; + } if (slot_info.is_file_slot) { _file_slot_descs.emplace_back(it->second); _file_col_names.push_back(it->second->col_name()); diff --git a/be/src/vec/exec/scan/file_scanner.h b/be/src/vec/exec/scan/file_scanner.h index 2876745a1b468e..37555884dcd77c 100644 --- a/be/src/vec/exec/scan/file_scanner.h +++ b/be/src/vec/exec/scan/file_scanner.h @@ -37,7 +37,8 @@ #include "vec/common/schema_util.h" #include "vec/core/block.h" #include "vec/exec/format/generic_reader.h" -#include "vec/exec/scan/scanner.h" +#include "vec/exec/format/orc/vorc_reader.h" +#include "vec/exec/format/parquet/vparquet_reader.h" #include "vec/exprs/vexpr_fwd.h" namespace doris { @@ -79,6 +80,22 @@ class FileScanner : public Scanner { std::string get_current_scan_range_name() override { return _current_range_path; } + //only used for read one line. + FileScanner(RuntimeState* state, RuntimeProfile* profile, const TFileScanRangeParams* params, + const std::unordered_map* colname_to_slot_id, + TupleDescriptor* tuple_desc) + : Scanner(state, profile), + _params(params), + _col_name_to_slot_id(colname_to_slot_id), + _real_tuple_desc(tuple_desc) {}; + + Status read_one_line_from_range(const TFileRangeDesc& range, const segment_v2::rowid_t rowid, + Block* result_block, + const ExternalFileMappingInfo& external_info, + int64_t* init_reader_ms, int64_t* get_block_ms); + + Status prepare_for_read_one_line(const TFileRangeDesc& range); + protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override; @@ -206,6 +223,9 @@ class FileScanner : public Scanner { // otherwise, point to _output_tuple_desc const TupleDescriptor* _real_tuple_desc = nullptr; + std::pair, int> _row_id_column_iterator_pair = {nullptr, + -1}; + private: Status _init_expr_ctxes(); Status _init_src_block(Block* block); @@ -226,13 +246,33 @@ class FileScanner : public Scanner { Status _process_conjuncts_for_dict_filter(); Status _process_late_arrival_conjuncts(); void _get_slot_ids(VExpr* expr, std::vector* slot_ids); + Status _generate_truncate_columns(bool need_to_get_parsed_schema); + Status _set_fill_or_truncate_columns(bool need_to_get_parsed_schema); + Status _init_orc_reader(std::unique_ptr&& orc_reader); + Status _init_parquet_reader(std::unique_ptr&& parquet_reader); + Status _create_row_id_column_iterator(const int slot_id); + + TFileFormatType::type _get_current_format_type() { + // for compatibility, if format_type is not set in range, use the format type of params + const TFileRangeDesc& range = _current_range; + return range.__isset.format_type ? range.format_type : _params->format_type; + }; + + Status _init_io_ctx() { + _io_ctx.reset(new io::IOContext()); + _io_ctx->query_id = &_state->query_id(); + return Status::OK(); + }; void _reset_counter() { _counter.num_rows_unselected = 0; _counter.num_rows_filtered = 0; } - TPushAggOp::type _get_push_down_agg_type() { return _local_state->get_push_down_agg_type(); } + TPushAggOp::type _get_push_down_agg_type() { + return _local_state == nullptr ? TPushAggOp::type::NONE + : _local_state->get_push_down_agg_type(); + } int64_t _get_push_down_count() { return _local_state->get_push_down_count(); } diff --git a/be/src/vec/exec/scan/olap_scanner.cpp b/be/src/vec/exec/scan/olap_scanner.cpp index 2b1d435fcb5066..06b28915c6e5b6 100644 --- a/be/src/vec/exec/scan/olap_scanner.cpp +++ b/be/src/vec/exec/scan/olap_scanner.cpp @@ -41,6 +41,7 @@ #include "exprs/function_filter.h" #include "io/cache/block_file_cache_profile.h" #include "io/io_common.h" +#include "olap/id_manager.h" #include "olap/inverted_index_profile.h" #include "olap/olap_common.h" #include "olap/olap_tuple.h" @@ -418,6 +419,13 @@ Status OlapScanner::_init_tablet_reader_params( } } + if (tablet_schema->has_global_row_id()) { + auto& id_file_map = _state->get_id_file_map(); + for (auto rs_reader : _tablet_reader_params.rs_splits) { + id_file_map->add_temp_rowset(rs_reader.rs_reader->rowset()); + } + } + return Status::OK(); } diff --git a/be/src/vec/exec/scan/scanner.h b/be/src/vec/exec/scan/scanner.h index a38d755a6d03d9..c682dd7d7583d5 100644 --- a/be/src/vec/exec/scan/scanner.h +++ b/be/src/vec/exec/scan/scanner.h @@ -57,6 +57,12 @@ class Scanner { Scanner(RuntimeState* state, pipeline::ScanLocalStateBase* local_state, int64_t limit, RuntimeProfile* profile); + //only used for FileScanner read one line. + Scanner(RuntimeState* state, RuntimeProfile* profile) + : _state(state), _limit(1), _profile(profile), _total_rf_num(0) { + DorisMetrics::instance()->scanner_cnt->increment(1); + }; + virtual ~Scanner() { SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_state->query_mem_tracker()); _input_block.clear(); diff --git a/be/test/exec/hash_map/hash_table_method_test.cpp b/be/test/exec/hash_map/hash_table_method_test.cpp index 31a1842112335f..5ca6a6cca36281 100644 --- a/be/test/exec/hash_map/hash_table_method_test.cpp +++ b/be/test/exec/hash_map/hash_table_method_test.cpp @@ -27,7 +27,7 @@ namespace doris::vectorized { template -void test_insert(HashMethodType& method, ColumnPtrs column) { +void test_insert(HashMethodType& method, Columns column) { using State = typename HashMethodType::State; ColumnRawPtrs key_raw_columns; for (auto column : column) { @@ -49,8 +49,7 @@ void test_insert(HashMethodType& method, ColumnPtrs column) { } template -void test_find(HashMethodType& method, ColumnPtrs column, - const std::vector& except_result) { +void test_find(HashMethodType& method, Columns column, const std::vector& except_result) { using State = typename HashMethodType::State; ColumnRawPtrs key_raw_columns; for (auto column : column) { diff --git a/be/test/olap/id_manager_test.cpp b/be/test/olap/id_manager_test.cpp new file mode 100644 index 00000000000000..468ddf16f0f10b --- /dev/null +++ b/be/test/olap/id_manager_test.cpp @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/id_manager.h" + +#include + +#include +#include +#include +#include + +#include "olap/olap_common.h" + +using namespace doris; + +TEST(IdFileMapTest, BasicOperations) { + IdFileMap id_file_map(1024); + + // Test adding a file mapping + + int64_t tablet_id = 1; + RowsetId rowset_id; + rowset_id.init(2); + uint32_t segment_id = 3; + + auto mapping1 = std::make_shared(tablet_id, rowset_id, segment_id); + uint32_t id1 = id_file_map.get_file_mapping_id(mapping1); + EXPECT_EQ(id1, 0); + + TFileRangeDesc scan_range; + scan_range.path = "https:a/b/c/d/a.parquet"; + scan_range.start_offset = 120; + + auto mapping2 = std::make_shared(2, scan_range, false); + uint32_t id2 = id_file_map.get_file_mapping_id(mapping2); + EXPECT_EQ(id2, 1); + + // Test getting a file mapping + auto retrieved_mapping1 = id_file_map.get_file_mapping(id1); + EXPECT_EQ(retrieved_mapping1->type, FileMappingType::INTERNAL); + std::cout << retrieved_mapping1->file_mapping_info_to_string() << "\n"; + auto internal = retrieved_mapping1->file_mapping_info_to_string(); + + int64_t tablet_id_ans = 0; + memcpy(&tablet_id_ans, internal.data(), sizeof(tablet_id_ans)); + EXPECT_EQ(tablet_id_ans, tablet_id); + + RowsetId rowset_id_ans; + memcpy(&rowset_id_ans, internal.data() + sizeof(tablet_id_ans), sizeof(rowset_id_ans)); + EXPECT_EQ(rowset_id_ans, rowset_id); + + uint32_t segment_id_ans = 0; + memcpy(&segment_id_ans, internal.data() + sizeof(tablet_id_ans) + sizeof(rowset_id_ans), + sizeof(segment_id_ans)); + EXPECT_EQ(segment_id_ans, segment_id); + + auto retrieved_mapping2 = id_file_map.get_file_mapping(id2); + EXPECT_EQ(retrieved_mapping2->type, FileMappingType::EXTERNAL); + auto str = retrieved_mapping2->file_mapping_info_to_string(); + EXPECT_TRUE(str.find(scan_range.path) != str.npos); + + // Test getting a non-existent file mapping + auto retrieved_mapping3 = id_file_map.get_file_mapping(999); + EXPECT_EQ(retrieved_mapping3, nullptr); +} + +TEST(IdFileMapTest, ConcurrentAddAndGet) { + IdFileMap id_file_map(1024); + std::vector threads; + + int64_t tablet_id = 1; + RowsetId rowset_id; + rowset_id.init(2); + + for (int i = 0; i < 10; ++i) { + threads.emplace_back([i, &id_file_map, &tablet_id, &rowset_id]() { + for (int j = 0; j < 100; ++j) { + uint32_t segment_id = i * 1000 + j; + + auto mapping = std::make_shared( + FileMapping {tablet_id, rowset_id, segment_id}); + uint32_t id = id_file_map.get_file_mapping_id(mapping); + auto retrieved_mapping = id_file_map.get_file_mapping(id); + EXPECT_EQ(retrieved_mapping->type, mapping->type); + EXPECT_EQ(retrieved_mapping->file_mapping_info_to_string(), + mapping->file_mapping_info_to_string()); + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } +} + +TEST(IdManagerTest, BasicOperations) { + IdManager id_manager; + + // Test adding an IdFileMap + UniqueId query_id1 = UniqueId::gen_uid(); + auto id_file_map1 = id_manager.add_id_file_map(query_id1, 1024); + EXPECT_NE(id_file_map1, nullptr); + + UniqueId query_id2 = UniqueId::gen_uid(); + auto id_file_map2 = id_manager.add_id_file_map(query_id2, 1024); + EXPECT_NE(id_file_map2, nullptr); + + // Test getting an existing IdFileMap + auto retrieved_id_file_map1 = id_manager.add_id_file_map(query_id1, 1024); + EXPECT_EQ(retrieved_id_file_map1, id_file_map1); + + // Test removing an IdFileMap + id_manager.remove_id_file_map(query_id1); + auto retrieved_id_file_map2 = id_manager.add_id_file_map(query_id1, 1024); + EXPECT_NE(retrieved_id_file_map2, id_file_map1); +} + +TEST(IdManagerTest, ConcurrentAddAndRemove) { + IdManager id_manager; + std::vector threads; + + for (int i = 0; i < 10; ++i) { + threads.emplace_back([&]() { + for (int j = 0; j < 10; ++j) { + UniqueId query_id = UniqueId::gen_uid(); + auto id_file_map = id_manager.add_id_file_map(query_id, 1024); + EXPECT_NE(id_file_map, nullptr); + + id_manager.remove_id_file_map(query_id); + auto retrieved_id_file_map = id_manager.add_id_file_map(query_id, 1024); + EXPECT_NE(retrieved_id_file_map, id_file_map); + } + }); + } + + for (auto& thread : threads) { + thread.join(); + } +} diff --git a/be/test/pipeline/operator/materialization_shared_state_test.cpp b/be/test/pipeline/operator/materialization_shared_state_test.cpp new file mode 100644 index 00000000000000..4299d4a690c3f5 --- /dev/null +++ b/be/test/pipeline/operator/materialization_shared_state_test.cpp @@ -0,0 +1,343 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "pipeline/dependency.h" +#include "vec/columns/column_vector.h" +#include "vec/core/field.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" + +namespace doris::pipeline { + +class MaterializationSharedStateTest : public testing::Test { +protected: + void SetUp() override { + _shared_state = std::make_shared(); + + // Setup test data types + _string_type = std::make_shared(); + _int_type = std::make_shared(); + + // Create origin block with rowid column (ColumnString type) + _shared_state->origin_block = vectorized::Block(); + _shared_state->origin_block.insert({_string_type->create_column(), _string_type, "rowid"}); + _shared_state->origin_block.insert({_int_type->create_column(), _int_type, "value"}); + + // Add rowid location + _shared_state->rowid_locs = {0}; // First column is rowid + + // Setup RPC structs for two backends + _backend_id1 = 1001; + _backend_id2 = 1002; + _shared_state->rpc_struct_map[_backend_id1] = FetchRpcStruct(); + _shared_state->rpc_struct_map[_backend_id2] = FetchRpcStruct(); + _shared_state->rpc_struct_map[_backend_id1].request.add_request_block_descs(); + _shared_state->rpc_struct_map[_backend_id2].request.add_request_block_descs(); + } + + std::shared_ptr _shared_state; + std::shared_ptr _string_type; + std::shared_ptr _int_type; + int64_t _backend_id1; + int64_t _backend_id2; +}; + +TEST_F(MaterializationSharedStateTest, TestCreateSourceDependency) { + // Test creating source dependencies + int test_op_id = 100; + int test_node_id = 200; + std::string test_name = "TEST"; + + auto* dep = _shared_state->create_source_dependency(test_op_id, test_node_id, test_name); + + // Verify the dependency was created correctly + ASSERT_NE(dep, nullptr); + EXPECT_EQ(dep->id(), test_op_id); + EXPECT_EQ(dep->name(), test_name + "_DEPENDENCY"); + + // Verify it was added to source_deps + EXPECT_EQ(_shared_state->source_deps.size(), 1); + EXPECT_EQ(_shared_state->source_deps[0].get(), dep); +} + +TEST_F(MaterializationSharedStateTest, TestCreateMultiGetResult) { + // Create test columns for rowids + vectorized::Columns columns; + auto rowid_col = _string_type->create_column(); + auto* col_data = reinterpret_cast(rowid_col.get()); + + // Create test GlobalRowLoacationV2 data + GlobalRowLoacationV2 loc1(0, _backend_id1, 1, 1); + GlobalRowLoacationV2 loc2(0, _backend_id2, 2, 2); + + col_data->insert_data(reinterpret_cast(&loc1), sizeof(GlobalRowLoacationV2)); + col_data->insert_data(reinterpret_cast(&loc2), sizeof(GlobalRowLoacationV2)); + columns.push_back(std::move(rowid_col)); + + // Test creating multiget result + Status st = _shared_state->create_muiltget_result(columns, true, true); + EXPECT_TRUE(st.ok()); + + // Verify block_order_results + EXPECT_EQ(_shared_state->block_order_results.size(), columns.size()); + EXPECT_EQ(_shared_state->last_block, true); +} + +TEST_F(MaterializationSharedStateTest, TestMergeMultiResponse) { + // 1. Setup origin block with nullable rowid column + auto nullable_rowid_col = vectorized::ColumnNullable::create(_string_type->create_column(), + vectorized::ColumnUInt8::create()); + nullable_rowid_col->insert_data((char*)&nullable_rowid_col, 4); + nullable_rowid_col->insert_data(nullptr, 4); + nullable_rowid_col->insert_data((char*)&nullable_rowid_col, 4); + + auto value_col = _int_type->create_column(); + value_col->insert(vectorized::Field::create_field(100)); + value_col->insert(vectorized::Field::create_field(101)); + value_col->insert(vectorized::Field::create_field(200)); + + // Add test data to origin block + _shared_state->origin_block = vectorized::Block( + {{std::move(nullable_rowid_col), vectorized::make_nullable(_string_type), "rowid"}, + {std::move(value_col), _int_type, "value"}}); + + // Set rowid column location + _shared_state->rowid_locs = {0}; + _shared_state->response_blocks = std::vector(1); + + // 2. Setup response blocks from multiple backends + // Backend 1's response + { + vectorized::Block resp_block1; + auto resp_value_col1 = _int_type->create_column(); + auto* value_col_data1 = + reinterpret_cast*>(resp_value_col1.get()); + value_col_data1->insert(vectorized::Field::create_field(100)); + value_col_data1->insert(vectorized::Field::create_field(101)); + resp_block1.insert( + {make_nullable(std::move(resp_value_col1)), make_nullable(_int_type), "value"}); + + auto callback1 = std::make_shared>(); + callback1->response_.reset(new PMultiGetResponseV2()); + auto serialized_block = callback1->response_->add_blocks()->mutable_block(); + size_t uncompressed_size = 0; + size_t compressed_size = 0; + auto s = resp_block1.serialize(0, serialized_block, &uncompressed_size, &compressed_size, + CompressionTypePB::LZ4); + EXPECT_TRUE(s.ok()); + + _shared_state->rpc_struct_map[_backend_id1].callback = callback1; + // init the response blocks + _shared_state->response_blocks[0] = resp_block1.clone_empty(); + } + + // Backend 2's response + { + vectorized::Block resp_block2; + auto resp_value_col2 = _int_type->create_column(); + auto* value_col_data2 = + reinterpret_cast*>(resp_value_col2.get()); + value_col_data2->insert(vectorized::Field::create_field(200)); + resp_block2.insert( + {make_nullable(std::move(resp_value_col2)), make_nullable(_int_type), "value"}); + + auto callback2 = std::make_shared>(); + callback2->response_.reset(new PMultiGetResponseV2()); + auto serialized_block = callback2->response_->add_blocks()->mutable_block(); + + size_t uncompressed_size = 0; + size_t compressed_size = 0; + auto s = resp_block2.serialize(0, serialized_block, &uncompressed_size, &compressed_size, + CompressionTypePB::LZ4); + EXPECT_TRUE(s.ok()); + + _shared_state->rpc_struct_map[_backend_id2].callback = callback2; + } + + // 3. Setup block order results to control merge order + _shared_state->block_order_results = { + {_backend_id1, 0, _backend_id2} // First block order: BE1,BE1,BE2 + }; + + // 4. Test merging responses + vectorized::Block result_block; + Status st = _shared_state->merge_multi_response(&result_block); + EXPECT_TRUE(st.ok()); + + // 5. Verify merged result + EXPECT_EQ(result_block.columns(), 2); // Should have original rowid column and value column + EXPECT_EQ(result_block.rows(), 3); // Total 3 rows from both backends + + // Verify the value column data is merged in correct order + auto* merged_value_col = result_block.get_by_position(0).column.get(); + EXPECT_EQ(*((int*)merged_value_col->get_data_at(0).data), 100); // First value from BE1 + EXPECT_EQ(merged_value_col->get_data_at(1).data, + nullptr); // Second value from BE1, replace by null + EXPECT_EQ(*((int*)merged_value_col->get_data_at(2).data), 200); // Third value from BE2 +} + +TEST_F(MaterializationSharedStateTest, TestMergeMultiResponseMultiBlocks) { + // 1. Setup origin block with multiple nullable rowid columns + auto nullable_rowid_col1 = vectorized::ColumnNullable::create( + _string_type->create_column(), vectorized::ColumnUInt8::create()); + nullable_rowid_col1->insert_data((char*)&nullable_rowid_col1, 4); + nullable_rowid_col1->insert_data(nullptr, 4); + nullable_rowid_col1->insert_data((char*)&nullable_rowid_col1, 4); + + auto nullable_rowid_col2 = vectorized::ColumnNullable::create( + _string_type->create_column(), vectorized::ColumnUInt8::create()); + nullable_rowid_col2->insert_data((char*)&nullable_rowid_col2, 4); + nullable_rowid_col2->insert_data((char*)&nullable_rowid_col2, 4); + nullable_rowid_col2->insert_data(nullptr, 4); + + auto value_col1 = _int_type->create_column(); + value_col1->insert(vectorized::Field::create_field(100)); + value_col1->insert(vectorized::Field::create_field(101)); + value_col1->insert(vectorized::Field::create_field(102)); + + auto value_col2 = _int_type->create_column(); + value_col2->insert(vectorized::Field::create_field(200)); + value_col2->insert(vectorized::Field::create_field(201)); + value_col2->insert(vectorized::Field::create_field(202)); + + // Add test data to origin block with multiple columns + _shared_state->origin_block = vectorized::Block( + {{std::move(nullable_rowid_col1), vectorized::make_nullable(_string_type), "rowid1"}, + {std::move(nullable_rowid_col2), vectorized::make_nullable(_string_type), "rowid2"}, + {std::move(value_col1), _int_type, "value1"}, + {std::move(value_col2), _int_type, "value2"}}); + + // Set multiple rowid column locations + _shared_state->rowid_locs = {0, 1}; + _shared_state->response_blocks = std::vector(2); + + // 2. Setup response blocks from multiple backends for first rowid + { + vectorized::Block resp_block1; + auto resp_value_col1 = _int_type->create_column(); + auto* value_col_data1 = + reinterpret_cast*>(resp_value_col1.get()); + value_col_data1->insert(vectorized::Field::create_field(100)); + resp_block1.insert( + {make_nullable(std::move(resp_value_col1)), make_nullable(_int_type), "value1"}); + + auto callback1 = std::make_shared>(); + callback1->response_.reset(new PMultiGetResponseV2()); + auto serialized_block = callback1->response_->add_blocks()->mutable_block(); + size_t uncompressed_size = 0; + size_t compressed_size = 0; + auto s = resp_block1.serialize(0, serialized_block, &uncompressed_size, &compressed_size, + CompressionTypePB::LZ4); + EXPECT_TRUE(s.ok()); + + _shared_state->rpc_struct_map[_backend_id1].callback = callback1; + _shared_state->response_blocks[0] = resp_block1.clone_empty(); + } + + // Backend 2's response for first rowid + { + vectorized::Block resp_block2; + auto resp_value_col2 = _int_type->create_column(); + auto* value_col_data2 = + reinterpret_cast*>(resp_value_col2.get()); + value_col_data2->insert(vectorized::Field::create_field(102)); + resp_block2.insert( + {make_nullable(std::move(resp_value_col2)), make_nullable(_int_type), "value1"}); + + auto callback2 = std::make_shared>(); + callback2->response_.reset(new PMultiGetResponseV2()); + auto serialized_block = callback2->response_->add_blocks()->mutable_block(); + size_t uncompressed_size = 0; + size_t compressed_size = 0; + auto s = resp_block2.serialize(0, serialized_block, &uncompressed_size, &compressed_size, + CompressionTypePB::LZ4); + EXPECT_TRUE(s.ok()); + + _shared_state->rpc_struct_map[_backend_id2].callback = callback2; + } + + // Add second block responses for second rowid + { + vectorized::Block resp_block1; + auto resp_value_col1 = _int_type->create_column(); + auto* value_col_data1 = + reinterpret_cast*>(resp_value_col1.get()); + value_col_data1->insert(vectorized::Field::create_field(200)); + resp_block1.insert( + {make_nullable(std::move(resp_value_col1)), make_nullable(_int_type), "value2"}); + + auto serialized_block = _shared_state->rpc_struct_map[_backend_id1] + .callback->response_->add_blocks() + ->mutable_block(); + size_t uncompressed_size = 0; + size_t compressed_size = 0; + auto s = resp_block1.serialize(0, serialized_block, &uncompressed_size, &compressed_size, + CompressionTypePB::LZ4); + EXPECT_TRUE(s.ok()); + _shared_state->response_blocks[1] = resp_block1.clone_empty(); + } + + { + vectorized::Block resp_block2; + auto resp_value_col2 = _int_type->create_column(); + auto* value_col_data2 = + reinterpret_cast*>(resp_value_col2.get()); + value_col_data2->insert(vectorized::Field::create_field(201)); + resp_block2.insert( + {make_nullable(std::move(resp_value_col2)), make_nullable(_int_type), "value2"}); + + auto serialized_block = _shared_state->rpc_struct_map[_backend_id2] + .callback->response_->add_blocks() + ->mutable_block(); + size_t uncompressed_size = 0; + size_t compressed_size = 0; + auto s = resp_block2.serialize(0, serialized_block, &uncompressed_size, &compressed_size, + CompressionTypePB::LZ4); + EXPECT_TRUE(s.ok()); + } + + // 3. Setup block order results for both rowids + _shared_state->block_order_results = { + {_backend_id1, 0, _backend_id2}, // First block order: BE1,null,BE2 + {_backend_id1, _backend_id2, 0} // Second block order: BE1,BE2,null + }; + + // 4. Test merging responses + vectorized::Block result_block; + Status st = _shared_state->merge_multi_response(&result_block); + EXPECT_TRUE(st.ok()); + + // 5. Verify merged result + EXPECT_EQ(result_block.columns(), 4); // Should have two rowid columns and two value columns + EXPECT_EQ(result_block.rows(), 3); // Total 3 rows from both backends + + // Verify the first value column data is merged in correct order + auto* merged_value_col1 = result_block.get_by_position(0).column.get(); + EXPECT_EQ(*((int*)merged_value_col1->get_data_at(0).data), 100); + EXPECT_EQ(merged_value_col1->get_data_at(1).data, nullptr); + EXPECT_EQ(*((int*)merged_value_col1->get_data_at(2).data), 102); + + // Verify the second value column data is merged in correct order + auto* merged_value_col2 = result_block.get_by_position(1).column.get(); + EXPECT_EQ(*((int*)merged_value_col2->get_data_at(0).data), 200); + EXPECT_EQ(*((int*)merged_value_col2->get_data_at(1).data), 201); + EXPECT_EQ(merged_value_col2->get_data_at(2).data, nullptr); +} + +} // namespace doris::pipeline diff --git a/be/test/testutil/mock/mock_in_expr.cpp b/be/test/testutil/mock/mock_in_expr.cpp index e69cb2832c977b..0b09eb95d1c6f9 100644 --- a/be/test/testutil/mock/mock_in_expr.cpp +++ b/be/test/testutil/mock/mock_in_expr.cpp @@ -24,6 +24,7 @@ #include "testutil/column_helper.h" #include "testutil/mock/mock_descriptors.h" #include "testutil/mock/mock_runtime_state.h" +#include "vec/exprs/vexpr_context.h" #include "vec/functions/in.h" namespace doris::vectorized { diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp index 60fc3ce35349ee..241a4c44d37bd1 100644 --- a/be/test/testutil/run_all_tests.cpp +++ b/be/test/testutil/run_all_tests.cpp @@ -38,6 +38,7 @@ #include "util/cpu_info.h" #include "util/disk_info.h" #include "util/mem_info.h" +#include "vec/exec/format/orc/orc_memory_pool.h" int main(int argc, char** argv) { SCOPED_INIT_THREAD_CONTEXT(); @@ -71,6 +72,8 @@ int main(int argc, char** argv) { doris::ExecEnv::GetInstance()->set_tablet_column_object_pool( doris::TabletColumnObjectPool::create_global_column_cache( doris::config::tablet_schema_cache_capacity)); + doris::ExecEnv::GetInstance()->set_orc_memory_pool(new doris::vectorized::ORCMemoryPool()); + LOG(INFO) << "init config " << st; doris::Status s = doris::config::set_config("enable_stacktrace", "false"); if (!s.ok()) { diff --git a/be/test/vec/exec/format/parquet/parquet_read_lines.cpp b/be/test/vec/exec/format/parquet/parquet_read_lines.cpp new file mode 100644 index 00000000000000..4b95b59abcae25 --- /dev/null +++ b/be/test/vec/exec/format/parquet/parquet_read_lines.cpp @@ -0,0 +1,350 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/object_pool.h" +#include "gtest/gtest_pred_impl.h" +#include "io/fs/local_file_system.h" +#include "orc/sargs/SearchArgument.hh" +#include "runtime/define_primitive_type.h" +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "util/timezone_utils.h" +#include "vec/columns/column.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/exec/format/orc/vorc_reader.h" +#include "vec/exec/format/parquet/vparquet_reader.h" +#include "vec/exec/scan/file_scanner.h" +#include "vec/exprs/vexpr_context.h" + +namespace doris { +namespace vectorized { +class VExprContext; + +class ParquetReadLinesTest : public testing::Test { +public: + ParquetReadLinesTest() {} +}; + +static void read_parquet_lines(std::vector numeric_types, + std::vector types, + std::list read_lines, String block_dump) { + TDescriptorTable t_desc_table; + TTableDescriptor t_table_desc; + + t_table_desc.id = 0; + t_table_desc.tableType = TTableType::OLAP_TABLE; + t_table_desc.numCols = 0; + t_table_desc.numClusteringCols = 0; + t_desc_table.tableDescriptors.push_back(t_table_desc); + t_desc_table.__isset.tableDescriptors = true; + + for (int i = 0; i < numeric_types.size(); i++) { + TSlotDescriptor tslot_desc; + { + tslot_desc.id = i; + tslot_desc.parent = 0; + TTypeDesc type; + { + TTypeNode node; + node.__set_type(TTypeNodeType::SCALAR); + TScalarType scalar_type; + scalar_type.__set_type(types[i]); + node.__set_scalar_type(scalar_type); + type.types.push_back(node); + } + tslot_desc.slotType = type; + tslot_desc.columnPos = 0; + tslot_desc.byteOffset = 0; + tslot_desc.nullIndicatorByte = 0; + tslot_desc.nullIndicatorBit = -1; + tslot_desc.colName = numeric_types[i]; + tslot_desc.slotIdx = 0; + tslot_desc.isMaterialized = true; + t_desc_table.slotDescriptors.push_back(tslot_desc); + } + } + + t_desc_table.__isset.slotDescriptors = true; + { + // TTupleDescriptor dest + TTupleDescriptor t_tuple_desc; + t_tuple_desc.id = 0; + t_tuple_desc.byteSize = 16; + t_tuple_desc.numNullBytes = 0; + t_tuple_desc.tableId = 0; + t_tuple_desc.__isset.tableId = true; + t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + } + DescriptorTbl* desc_tbl; + ObjectPool obj_pool; + static_cast(DescriptorTbl::create(&obj_pool, t_desc_table, &desc_tbl)); + + auto slot_descs = desc_tbl->get_tuple_descriptor(0)->slots(); + auto local_fs = io::global_local_filesystem(); + io::FileReaderSPtr reader; + static_cast( + local_fs->open_file("./be/test/exec/test_data/" + "parquet_scanner/type-decoder.parquet", + &reader)); + + cctz::time_zone ctz; + TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, ctz); + auto tuple_desc = desc_tbl->get_tuple_descriptor(0); + std::vector column_names; + std::vector missing_column_names; + for (int i = 0; i < slot_descs.size(); i++) { + column_names.push_back(slot_descs[i]->col_name()); + } + TFileScanRangeParams scan_params; + TFileRangeDesc scan_range; + { + scan_range.start_offset = 0; + scan_range.size = 1000; + } + auto p_reader = + new ParquetReader(nullptr, scan_params, scan_range, 992, &ctz, nullptr, nullptr); + std::pair, int> iterator_pair; + iterator_pair = + std::make_pair(std::make_shared( + IdManager::ID_VERSION, BackendOptions::get_backend_id(), 10), + tuple_desc->slots().size()); + p_reader->set_row_id_column_iterator(iterator_pair); + p_reader->set_file_reader(reader); + static_cast(p_reader->set_read_lines_mode(read_lines)); + + RuntimeState runtime_state((TQueryGlobals())); + runtime_state.set_desc_tbl(desc_tbl); + + std::unordered_map colname_to_value_range; + static_cast(p_reader->open()); + static_cast(p_reader->init_reader(column_names, missing_column_names, nullptr, {}, + nullptr, nullptr, nullptr, nullptr, nullptr)); + std::unordered_map> + partition_columns; + std::unordered_map missing_columns; + static_cast(p_reader->set_fill_columns(partition_columns, missing_columns)); + BlockUPtr block = Block::create_unique(); + for (const auto& slot_desc : tuple_desc->slots()) { + auto data_type = make_nullable(slot_desc->type()); + MutableColumnPtr data_column = data_type->create_column(); + block->insert( + ColumnWithTypeAndName(std::move(data_column), data_type, slot_desc->col_name())); + } + + auto data_type = vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_VARCHAR, false); + block->insert(ColumnWithTypeAndName(data_type->create_column()->assume_mutable(), data_type, + "row_id")); + + bool eof = false; + size_t read_row = 0; + static_cast(p_reader->get_next_block(block.get(), &read_row, &eof)); + auto row_id_string_column = + static_cast(*block->get_by_name("row_id").column.get()); + auto read_lines_tmp = read_lines; + for (auto i = 0; i < row_id_string_column.size(); i++) { + GlobalRowLoacationV2 info = + *((GlobalRowLoacationV2*)row_id_string_column.get_data_at(i).data); + EXPECT_EQ(info.file_id, 10); + EXPECT_EQ(info.row_id, read_lines_tmp.front()); + read_lines_tmp.pop_front(); + EXPECT_EQ(info.backend_id, BackendOptions::get_backend_id()); + EXPECT_EQ(info.version, IdManager::ID_VERSION); + } + block->erase("row_id"); + + EXPECT_EQ(block->dump_data(), block_dump); + std::cout << block->dump_data(); + EXPECT_TRUE(eof); + delete p_reader; + + scan_params.file_type = TFileType::FILE_LOCAL; + scan_range.path = + "./be/test/exec/test_data/parquet_scanner/" + "type-decoder.parquet"; + scan_range.start_offset = 0; + scan_range.format_type = TFileFormatType::FORMAT_PARQUET; + scan_range.__isset.format_type = true; + std::unordered_map colname_to_slot_id; + for (auto slot : tuple_desc->slots()) { + TFileScanSlotInfo slot_info; + slot_info.slot_id = slot->id(); + slot_info.is_file_slot = true; + scan_params.required_slots.emplace_back(slot_info); + } + runtime_state._timezone = "CST"; + + std::unique_ptr runtime_profile; + runtime_profile = std::make_unique("ExternalRowIDFetcher"); + + auto vf = FileScanner::create_unique(&runtime_state, runtime_profile.get(), &scan_params, + &colname_to_slot_id, tuple_desc); + EXPECT_TRUE(vf->prepare_for_read_one_line(scan_range).ok()); + ExternalFileMappingInfo external_info(0, scan_range, false); + int64_t init_reader_ms = 0; + int64_t get_block_ms = 0; + + auto read_lines_tmp2 = read_lines; + while (!read_lines_tmp2.empty()) { + auto st = vf->read_one_line_from_range(scan_range, read_lines_tmp2.front(), block.get(), + external_info, &init_reader_ms, &get_block_ms); + std::cout << st.to_string() << "\n"; + EXPECT_TRUE(st.ok()); + + read_lines_tmp2.pop_front(); + } + EXPECT_EQ(block->dump_data(read_lines.size()), block_dump); +} + +TEST_F(ParquetReadLinesTest, test0) { + std::vector numeric_types = {"boolean_col", "tinyint_col", "smallint_col", + "int_col", "bigint_col", "float_col", + "double_col"}; + std::vector types = {TPrimitiveType::BOOLEAN, TPrimitiveType::TINYINT, + TPrimitiveType::SMALLINT, TPrimitiveType::INT, + TPrimitiveType::BIGINT, TPrimitiveType::FLOAT, + TPrimitiveType::DOUBLE}; + std::list read_lines {1, 5, 7}; + std::string block_dump = + "+----------------------------+---------------------------+----------------------------" + "-+------------------------+---------------------------+----------------------------+--" + "---------------------------+\n" + "|boolean_col(Nullable(UInt8))|tinyint_col(Nullable(Int8))|smallint_col(Nullable(Int16)" + ")|int_col(Nullable(Int32))|bigint_col(Nullable(Int64))|float_col(Nullable(Float32))|" + "double_col(Nullable(Float64))|\n" + "+----------------------------+---------------------------+----------------------------" + "-+------------------------+---------------------------+----------------------------+--" + "---------------------------+\n" + "| 1| 2| " + "2| 2| 2| 2.14| " + " 2.14|\n" + "| 0| 6| " + "6| 6| 6| 6.14| " + " 6.14|\n" + "| 0| 8| " + "8| 8| 8| 8.14| " + " 8.14|\n" + "+----------------------------+---------------------------+----------------------------" + "-+------------------------+---------------------------+----------------------------+--" + "---------------------------+\n"; + read_parquet_lines(numeric_types, types, read_lines, block_dump); +} + +TEST_F(ParquetReadLinesTest, test1) { + std::vector numeric_types = {"boolean_col", "tinyint_col", "float_col"}; + std::vector types = {TPrimitiveType::BOOLEAN, TPrimitiveType::TINYINT, + TPrimitiveType::FLOAT}; + std::list read_lines {2, 6}; + std::string block_dump = + "+----------------------------+---------------------------+----------------------------" + "+\n" + "|boolean_col(Nullable(UInt8))|tinyint_col(Nullable(Int8))|float_col(Nullable(Float32))" + "|\n" + "+----------------------------+---------------------------+----------------------------" + "+\n" + "| 0| -3| " + "-3.14|\n" + "| 1| -7| " + "-7.14|\n" + "+----------------------------+---------------------------+----------------------------" + "+\n"; + read_parquet_lines(numeric_types, types, read_lines, block_dump); +} + +TEST_F(ParquetReadLinesTest, test2) { + std::vector numeric_types = {"double_col", "int_col", "float_col"}; + std::vector types = {TPrimitiveType::DOUBLE, TPrimitiveType::INT, + TPrimitiveType::FLOAT}; + std::list read_lines {1, 4, 9}; + string block_dump = + "+-----------------------------+------------------------+----------------------------+" + "\n" + "|double_col(Nullable(Float64))|int_col(Nullable(Int32))|float_col(Nullable(Float32))|" + "\n" + "+-----------------------------+------------------------+----------------------------+" + "\n" + "| 2.14| 2| " + "2.14|\n" + "| -5.14| -5| " + "-5.14|\n" + "| 10.14| 10| " + "10.14|\n" + "+-----------------------------+------------------------+----------------------------+" + "\n"; + read_parquet_lines(numeric_types, types, read_lines, block_dump); +} + +TEST_F(ParquetReadLinesTest, test3) { + std::vector numeric_types = {"double_col", "int_col", "float_col"}; + std::vector types = {TPrimitiveType::DOUBLE, TPrimitiveType::INT, + TPrimitiveType::FLOAT}; + std::list read_lines {3, 6, 8}; + std::string block_dump = + "+-----------------------------+------------------------+----------------------------+" + "\n" + "|double_col(Nullable(Float64))|int_col(Nullable(Int32))|float_col(Nullable(Float32))|" + "\n" + "+-----------------------------+------------------------+----------------------------+" + "\n" + "| 4.14| 4| " + "4.14|\n" + "| -7.14| -7| " + "-7.14|\n" + "| -9.14| -9| " + "-9.14|\n" + "+-----------------------------+------------------------+----------------------------+" + "\n"; + read_parquet_lines(numeric_types, types, read_lines, block_dump); +} + +TEST_F(ParquetReadLinesTest, test4) { + std::vector numeric_types = {"string_col", "char_col"}; + std::vector types = {TPrimitiveType::STRING, TPrimitiveType::STRING}; + std::list read_lines {3, 6, 8}; + std::string block_dump = + "+----------------------------+--------------------------+\n" + "|string_col(Nullable(String))|char_col(Nullable(String))|\n" + "+----------------------------+--------------------------+\n" + "| NULL| c-row3|\n" + "| s-row6| c-row6|\n" + "| s-row8| c-row8|\n" + "+----------------------------+--------------------------+\n"; + read_parquet_lines(numeric_types, types, read_lines, block_dump); +} + +} // namespace vectorized +} // namespace doris diff --git a/be/test/vec/exec/orc/orc_read_lines.cpp b/be/test/vec/exec/orc/orc_read_lines.cpp new file mode 100644 index 00000000000000..edc078e459b86d --- /dev/null +++ b/be/test/vec/exec/orc/orc_read_lines.cpp @@ -0,0 +1,391 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common/object_pool.h" +#include "gtest/gtest_pred_impl.h" +#include "io/fs/local_file_system.h" +#include "runtime/define_primitive_type.h" +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "testutil/desc_tbl_builder.h" +#include "vec/columns/column.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/exec/format/orc/orc_memory_pool.h" +#include "vec/exec/format/orc/vorc_reader.h" +#include "vec/exec/format/parquet/vparquet_reader.h" +#include "vec/exec/scan/file_scanner.h" +#include "vec/exprs/vexpr_context.h" + +namespace doris { +namespace vectorized { +class VExprContext; + +class OrcReadLinesTest : public testing::Test { +public: + OrcReadLinesTest() {} +}; + +static void read_orc_line(int64_t line, std::string block_dump) { + auto runtime_state = RuntimeState::create_unique(); + + std::vector column_names = {"col1", "col2", "col3", "col4", "col5", + "col6", "col7", "col8", "col9"}; + ObjectPool object_pool; + DescriptorTblBuilder builder(&object_pool); + builder.declare_tuple() << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_BIGINT, true), + "col1") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_BOOLEAN, true), + "col2") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_VARCHAR, true), + "col3") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_DATEV2, true), + "col4") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_DOUBLE, true), + "col5") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_FLOAT, true), + "col6") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_INT, true), + "col7") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_SMALLINT, true), + "col8") + << std::make_tuple( + vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_VARCHAR, true), + "col9"); + DescriptorTbl* desc_tbl = builder.build(); + auto* tuple_desc = const_cast(desc_tbl->get_tuple_descriptor(0)); + RowDescriptor row_desc(tuple_desc, false); + TFileScanRangeParams params; + params.file_type = TFileType::FILE_LOCAL; + TFileRangeDesc range; + range.path = "./be/test/exec/test_data/orc_scanner/my-file.orc"; + range.start_offset = 0; + range.size = 2024; + + io::IOContext io_ctx; + std::string time_zone = "CST"; + auto reader = OrcReader::create_unique(nullptr, runtime_state.get(), params, range, 100, + time_zone, &io_ctx, true); + auto local_fs = io::global_local_filesystem(); + io::FileReaderSPtr file_reader; + static_cast(reader->set_read_lines_mode({line})); + + static_cast(local_fs->open_file(range.path, &file_reader)); + + std::pair, int> iterator_pair; + iterator_pair = + std::make_pair(std::make_shared( + IdManager::ID_VERSION, BackendOptions::get_backend_id(), 10), + tuple_desc->slots().size()); + reader->set_row_id_column_iterator(iterator_pair); + + auto status = reader->init_reader(&column_names, {}, nullptr, {}, false, tuple_desc, &row_desc, + nullptr, nullptr); + + EXPECT_TRUE(status.ok()); + + std::unordered_map> + partition_columns; + std::unordered_map missing_columns; + static_cast(reader->set_fill_columns(partition_columns, missing_columns)); + BlockUPtr block = Block::create_unique(); + for (const auto& slot_desc : tuple_desc->slots()) { + auto data_type = slot_desc->type(); + MutableColumnPtr data_column = data_type->create_column(); + block->insert( + ColumnWithTypeAndName(std::move(data_column), data_type, slot_desc->col_name())); + } + auto data_type = vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_VARCHAR, false); + block->insert(ColumnWithTypeAndName(data_type->create_column()->assume_mutable(), data_type, + "row_id")); + + bool eof = false; + size_t read_row = 0; + static_cast(reader->get_next_block(block.get(), &read_row, &eof)); + auto row_id_string_column = + static_cast(*block->get_by_name("row_id").column.get()); + for (auto i = 0; i < row_id_string_column.size(); i++) { + GlobalRowLoacationV2 info = + *((GlobalRowLoacationV2*)row_id_string_column.get_data_at(i).data); + EXPECT_EQ(info.file_id, 10); + EXPECT_EQ(info.row_id, line); + EXPECT_EQ(info.backend_id, BackendOptions::get_backend_id()); + EXPECT_EQ(info.version, IdManager::ID_VERSION); + } + block->erase("row_id"); + + std::cout << block->dump_data(); + EXPECT_EQ(block->dump_data(), block_dump); + + range.format_type = TFileFormatType::FORMAT_ORC; + range.__isset.format_type = true; + std::unordered_map colname_to_slot_id; + for (auto slot : tuple_desc->slots()) { + TFileScanSlotInfo slot_info; + slot_info.slot_id = slot->id(); + slot_info.is_file_slot = true; + params.required_slots.emplace_back(slot_info); + } + runtime_state->_timezone = "CST"; + + std::unique_ptr runtime_profile; + runtime_profile = std::make_unique("ExternalRowIDFetcher"); + + auto vf = FileScanner::create_unique(runtime_state.get(), runtime_profile.get(), ¶ms, + &colname_to_slot_id, tuple_desc); + EXPECT_TRUE(vf->prepare_for_read_one_line(range).ok()); + ExternalFileMappingInfo external_info(0, range, false); + int64_t init_reader_ms = 0; + int64_t get_block_ms = 0; + auto st = vf->read_one_line_from_range(range, line, block.get(), external_info, &init_reader_ms, + &get_block_ms); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(block->dump_data(1), block_dump); +} + +TEST_F(OrcReadLinesTest, test0) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 0| NULL| doris| " + "NULL| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(0, block_dump); +} +TEST_F(OrcReadLinesTest, test1) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 1| 1| doris| " + "1900-01-01| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(1, block_dump); +} + +TEST_F(OrcReadLinesTest, test2) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 2| NULL| doris| " + "1900-01-01| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(2, block_dump); +} + +TEST_F(OrcReadLinesTest, test3) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 3| 1| doris| " + "1900-01-01| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(3, block_dump); +} +TEST_F(OrcReadLinesTest, test4) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 4| NULL| doris| " + "NULL| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(4, block_dump); +} +TEST_F(OrcReadLinesTest, test5) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 5| 1| doris| " + "1900-01-01| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(5, block_dump); +} +TEST_F(OrcReadLinesTest, test6) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 6| NULL| doris| " + "1900-01-01| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(6, block_dump); +} +TEST_F(OrcReadLinesTest, test7) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 7| 1| doris| " + "1900-01-01| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(7, block_dump); +} +TEST_F(OrcReadLinesTest, test8) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 8| NULL| doris| " + "NULL| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(8, block_dump); +} +TEST_F(OrcReadLinesTest, test9) { + std::string block_dump = + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "|col1(Nullable(Int64))|col2(Nullable(UInt8))|col3(Nullable(String))|col4(Nullable(" + "DateV2))|col5(Nullable(Float64))|col6(Nullable(Float32))|col7(Nullable(Int32))|col8(" + "Nullable(Int16))|col9(Nullable(String))|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n" + "| 9| 1| doris| " + "1900-01-01| 1.567| 1.567| 12345| " + " 1| doris|\n" + "+---------------------+---------------------+----------------------+------------------" + "----+-----------------------+-----------------------+---------------------+-----------" + "----------+----------------------+\n"; + read_orc_line(9, block_dump); +} + +} // namespace vectorized +} // namespace doris diff --git a/be/test/vec/exec/orc_reader_test.cpp b/be/test/vec/exec/orc_reader_test.cpp index 6cb98bb1a009d9..31d9bd25b4051b 100644 --- a/be/test/vec/exec/orc_reader_test.cpp +++ b/be/test/vec/exec/orc_reader_test.cpp @@ -103,7 +103,6 @@ class OrcReaderTest : public testing::Test { }; TEST_F(OrcReaderTest, test_build_search_argument) { - ExecEnv::GetInstance()->set_orc_memory_pool(new ORCMemoryPool()); std::vector exprs = { diff --git a/be/test/vec/exec/sort/partition_sorter_test.cpp b/be/test/vec/exec/sort/partition_sorter_test.cpp index 8a27d6e8a62b3f..40ae4248436bdf 100644 --- a/be/test/vec/exec/sort/partition_sorter_test.cpp +++ b/be/test/vec/exec/sort/partition_sorter_test.cpp @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include "vec/common/sort/partition_sorter.h" + #include #include #include diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run80.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run80.hql new file mode 100644 index 00000000000000..7f374dacfc0128 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run80.hql @@ -0,0 +1,23 @@ +create database if not exists global_lazy_mat_db; +use global_lazy_mat_db; + +CREATE TABLE `orc_topn_lazy_mat_table`( + `id` int, + `name` string, + `value` double, + `active` boolean, + `score` double)PARTITIONED BY( + `file_id` int) STORED AS ORC LOCATION + '/user/doris/preinstalled_data/orc_table/orc_global_lazy_mat_table/'; + +CREATE TABLE `parquet_topn_lazy_mat_table`( + `id` int, + `name` string, + `value` double, + `active` boolean, + `score` double)PARTITIONED BY( + `file_id` int) STORED AS PARQUET LOCATION + '/user/doris/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/'; + +msck repair table orc_topn_lazy_mat_table; +msck repair table parquet_topn_lazy_mat_table; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=1/example_1.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=1/example_1.orc new file mode 100644 index 00000000000000..42f2d7ada3f025 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=1/example_1.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=2/example_2.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=2/example_2.orc new file mode 100644 index 00000000000000..554317213533c2 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=2/example_2.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=2/example_3.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=2/example_3.orc new file mode 100644 index 00000000000000..721f2b4eaecd60 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_global_lazy_mat_table/file_id=2/example_3.orc differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=1/example_1.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=1/example_1.parquet new file mode 100644 index 00000000000000..b95708f95e5512 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=1/example_1.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=1/example_3.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=1/example_3.parquet new file mode 100644 index 00000000000000..165f5017e81fe4 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=1/example_3.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=2/example_2.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=2/example_2.parquet new file mode 100644 index 00000000000000..a67a15de046e3b Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_global_lazy_mat_table/file_id=2/example_2.parquet differ diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 71250e1a96fe52..db2e2e0ca0a3ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -70,6 +70,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.ListIterator; @@ -2620,5 +2621,18 @@ public void setNullableFromNereids(boolean nullable) { public void clearNullableFromNereids() { nullableFromNereids = Optional.empty(); } + + public Set getInputSlotRef() { + Set slots = new HashSet<>(); + if (this instanceof SlotRef) { + slots.add((SlotRef) this); + return slots; + } else { + for (Expr expr : children) { + slots.addAll(expr.getInputSlotRef()); + } + } + return slots; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java index 7078c90f1ed5be..7ac96a9faa9d5b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java @@ -439,7 +439,7 @@ public void setTupleId(TupleId tupleId) { this.tupleId = tupleId; } - TupleId getTupleId() { + public TupleId getTupleId() { return tupleId; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index d15e8280ee4312..5b9c78ac488f78 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -66,8 +66,8 @@ public class Column implements GsonPostProcessable { public static final String WHERE_SIGN = "__DORIS_WHERE_SIGN__"; public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__"; public static final String ROWID_COL = "__DORIS_ROWID_COL__"; + public static final String GLOBAL_ROWID_COL = "__DORIS_GLOBAL_ROWID_COL__"; public static final String ROW_STORE_COL = "__DORIS_ROW_STORE_COL__"; - public static final String DYNAMIC_COLUMN_NAME = "__DORIS_DYNAMIC_COL__"; public static final String VERSION_COL = "__DORIS_VERSION_COL__"; public static final String SKIP_BITMAP_COL = "__DORIS_SKIP_BITMAP_COL__"; // NOTE: you should name hidden column start with '__DORIS_' !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index f6f0877be25131..7dcb23dd03ac4b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -3157,6 +3157,30 @@ public TFetchOption generateTwoPhaseReadOption(long selectedIndexId) { return fetchOption; } + public void getColumnDesc(long selectedIndexId, List columnsDesc, List keyColumnNames, + List keyColumnTypes, Set materializedColumnNames) { + if (selectedIndexId != -1) { + for (Column col : this.getSchemaByIndexId(selectedIndexId, true)) { + // if (!materializedColumnNames.contains(col.getName())) { + // continue; + // } + TColumn tColumn = col.toThrift(); + col.setIndexFlag(tColumn, this); + if (columnsDesc != null) { + columnsDesc.add(tColumn); + } + if ((Util.showHiddenColumns() || (!Util.showHiddenColumns() && col.isVisible())) && col.isKey()) { + if (keyColumnNames != null) { + keyColumnNames.add(col.getName()); + } + if (keyColumnTypes != null) { + keyColumnTypes.add(col.getDataType().toThrift()); + } + } + } + } + } + public void getColumnDesc(long selectedIndexId, List columnsDesc, List keyColumnNames, List keyColumnTypes) { if (selectedIndexId != -1) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 42d308dfefcc93..80bb68295fb71e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -143,6 +143,17 @@ default List getBaseSchemaOrEmpty() { Column getColumn(String name); + default int getBaseColumnIdxByName(String colName) { + int i = 0; + for (Column col : getBaseSchema()) { + if (col.getName().equalsIgnoreCase(colName)) { + return i; + } + ++i; + } + return -1; + } + String getMysqlType(); String getEngine(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java index 99b9e714ae7937..1c2bbceb25f330 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java @@ -251,6 +251,10 @@ private void setColumnPositionMapping() } SlotDescriptor slotDesc = desc.getSlot(slot.getSlotId()); String colName = slotDesc.getColumn().getName(); + if (colName.startsWith(Column.GLOBAL_ROWID_COL)) { + continue; + } + int idx = -1; List columns = getColumns(); for (int i = 0; i < columns.size(); i++) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index c50b932d5f8c1e..c913a2faf0d1fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -118,6 +118,8 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI private static final Logger LOG = LogManager.getLogger(HMSExternalTable.class); public static final Set SUPPORTED_HIVE_FILE_FORMATS; + public static final Set SUPPORTED_HIVE_TOPN_LAZY_FILE_FORMATS; + public static final Set SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS; public static final Set SUPPORTED_HUDI_FILE_FORMATS; @@ -151,6 +153,10 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS = Sets.newHashSet(); SUPPORTED_HIVE_TRANSACTIONAL_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"); + + SUPPORTED_HIVE_TOPN_LAZY_FILE_FORMATS = Sets.newHashSet(); + SUPPORTED_HIVE_TOPN_LAZY_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"); + SUPPORTED_HIVE_TOPN_LAZY_FILE_FORMATS.add("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"); } static { @@ -304,6 +310,25 @@ private boolean supportedHiveTable() { return true; } + /** + * Only support /orc/orc transactional/parquet table. + */ + public boolean supportedHiveTopNLazyTable() { + if (remoteTable.getSd() == null) { + return false; + } + + if (remoteTable.isSetViewExpandedText() || remoteTable.isSetViewOriginalText()) { + return false; + } + + String inputFileFormat = remoteTable.getSd().getInputFormat(); + if (inputFileFormat == null) { + return false; + } + return SUPPORTED_HIVE_TOPN_LAZY_FILE_FORMATS.contains(inputFileFormat); + } + /** * Get the related remote hive metastore table. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index c9ed13d4637359..52dffcf30d35e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -48,6 +48,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.FileQueryScanNode; import org.apache.doris.datasource.es.EsExternalTable; @@ -113,6 +114,7 @@ import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PreAggStatus; import org.apache.doris.nereids.trees.plans.algebra.Aggregate; +import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation; import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalJoin; import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalSort; import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows; @@ -139,6 +141,8 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalIntersect; import org.apache.doris.nereids.trees.plans.physical.PhysicalJdbcScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalJdbcTableSink; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterialize; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalLimit; import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalOdbcScan; @@ -159,6 +163,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow; import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter; +import org.apache.doris.nereids.trees.plans.physical.TopnFilter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.DataType; @@ -186,6 +191,7 @@ import org.apache.doris.planner.IcebergTableSink; import org.apache.doris.planner.IntersectNode; import org.apache.doris.planner.JoinNodeBase; +import org.apache.doris.planner.MaterializationNode; import org.apache.doris.planner.MultiCastDataSink; import org.apache.doris.planner.MultiCastPlanFragment; import org.apache.doris.planner.NestedLoopJoinNode; @@ -801,6 +807,11 @@ public PlanFragment visitPhysicalOdbcScan(PhysicalOdbcScan odbcScan, PlanTransla @Override public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTranslatorContext context) { + return computePhysicalOlapScan(olapScan, false, context); + } + + private PlanFragment computePhysicalOlapScan(PhysicalOlapScan olapScan, + boolean lazyMaterialize, PlanTranslatorContext context) { List slots = olapScan.getOutput(); OlapTable olapTable = olapScan.getTable(); // generate real output tuple @@ -860,15 +871,17 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla Utils.execWithUncheckedException(olapScanNode::init); // TODO: process collect scan node in one place context.addScanNode(olapScanNode, olapScan); - // TODO: process translate runtime filter in one place - // use real plan node to present rf apply and rf generator - context.getRuntimeTranslator().ifPresent( - runtimeFilterTranslator -> runtimeFilterTranslator.getContext().getTargetListByScan(olapScan) - .forEach(expr -> runtimeFilterTranslator.translateRuntimeFilterTarget( - expr, olapScanNode, context) - ) - ); - context.getTopnFilterContext().translateTarget(olapScan, olapScanNode, context); + if (!lazyMaterialize) { + // TODO: process translate runtime filter in one place + // use real plan node to present rf apply and rf generator + context.getRuntimeTranslator().ifPresent( + runtimeFilterTranslator -> runtimeFilterTranslator.getContext().getTargetListByScan(olapScan) + .forEach(expr -> runtimeFilterTranslator.translateRuntimeFilterTarget( + expr, olapScanNode, context) + ) + ); + context.getTopnFilterContext().translateTarget(olapScan, olapScanNode, context); + } olapScanNode.setPushDownAggNoGrouping(context.getRelationPushAggOp(olapScan.getRelationId())); // Create PlanFragment // TODO: use a util function to convert distribution to DataPartition @@ -2297,6 +2310,19 @@ public PlanFragment visitPhysicalTopN(PhysicalTopN topN, PlanTra sortNode.setLimit(topN.getLimit()); if (context.getTopnFilterContext().isTopnFilterSource(topN)) { context.getTopnFilterContext().translateSource(topN, sortNode); + TopnFilter filter = context.getTopnFilterContext().getTopnFilter(topN); + List> targets = new ArrayList<>(); + for (Map.Entry entry : filter.legacyTargets.entrySet()) { + Set inputSlots = entry.getValue().getInputSlotRef(); + if (inputSlots.size() != 1) { + LOG.warn("topn filter targets error: " + inputSlots); + } else { + SlotRef slot = inputSlots.iterator().next(); + targets.add(Pair.of(entry.getKey().getId().asInt(), + (slot.getDesc().getId().asInt()))); + } + } + sortNode.setTopnFilterTargets(targets); } // push sort to scan opt if (sortNode.getChild(0) instanceof OlapScanNode) { @@ -2522,6 +2548,76 @@ public PlanFragment visitPhysicalWindow(PhysicalWindow physicalW return inputPlanFragment; } + @Override + public PlanFragment visitPhysicalLazyMaterialize(PhysicalLazyMaterialize materialize, + PlanTranslatorContext context) { + PlanFragment inputPlanFragment = materialize.child(0).accept(this, context); + TupleDescriptor materializeTupleDesc = generateTupleDesc(materialize.getOutput(), null, context); + + MaterializationNode materializeNode = new MaterializationNode(context.nextPlanNodeId(), materializeTupleDesc, + inputPlanFragment.getPlanRoot()); + + List rowIds = materialize.getRowIds().stream() + .map(e -> ExpressionTranslator.translate(e, context)) + .collect(Collectors.toList()); + materializeNode.setRowIds(rowIds); + + materializeNode.setLazyColumns(materialize.getLazyColumns()); + materializeNode.setLocations(materialize.getLazySlotLocations()); + materializeNode.setIdxs(materialize.getlazyTableIdxs()); + + List rowStoreFlags = new ArrayList<>(); + for (CatalogRelation relation : materialize.getRelations()) { + rowStoreFlags.add(shouldUseRowStore(relation)); + } + materializeNode.setRowStoreFlags(rowStoreFlags); + + materializeNode.setTopMaterializeNode(context.isTopMaterializeNode()); + if (context.isTopMaterializeNode()) { + context.setTopMaterializeNode(false); + } + + inputPlanFragment.addPlanRoot(materializeNode); + return inputPlanFragment; + } + + private boolean shouldUseRowStore(CatalogRelation rel) { + boolean useRowStore = false; + if (rel instanceof PhysicalOlapScan) { + OlapTable olapTable = ((PhysicalOlapScan) rel).getTable(); + useRowStore = olapTable.storeRowColumn() + && CollectionUtils.isEmpty(olapTable.getTableProperty().getCopiedRowStoreColumns()); + } + return useRowStore; + } + + @Override + public PlanFragment visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterializeOlapScan lazyScan, + PlanTranslatorContext context) { + PlanFragment planFragment = computePhysicalOlapScan(lazyScan.getScan(), true, context); + TupleDescriptor outputTuple = generateTupleDesc(lazyScan.getOutput(), lazyScan.getScan().getTable(), context); + OlapScanNode olapScanNode = (OlapScanNode) planFragment.getPlanRoot(); + olapScanNode.setDesc(outputTuple); + olapScanNode.setIsTopnLazyMaterialize(true); + olapScanNode.setGlobalRowIdColumn(lazyScan.getRowId().getOriginalColumn().get()); + for (Slot slot : lazyScan.getOutput()) { + if (((SlotReference) slot).getOriginalColumn().isPresent()) { + olapScanNode.addTopnLazyMaterializeOutputColumns(((SlotReference) slot).getOriginalColumn().get()); + } + } + planFragment.getPlanRoot().resetTupleIds(Lists.newArrayList(outputTuple.getId())); + // translate rf on outputTuple + context.getRuntimeTranslator().ifPresent( + runtimeFilterTranslator -> runtimeFilterTranslator.getContext().getTargetListByScan(lazyScan) + .forEach(expr -> runtimeFilterTranslator.translateRuntimeFilterTarget( + expr, olapScanNode, context) + ) + ); + context.getTopnFilterContext().translateTarget(lazyScan, olapScanNode, context); + + return planFragment; + } + /* ******************************************************************************************** * private functions * ******************************************************************************************** */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index de7fa61422bde5..bbc95f08b04876 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -116,6 +116,8 @@ public class PlanTranslatorContext { private final Map> statsUnknownColumnsMap = Maps.newHashMap(); + private boolean isTopMaterializeNode = true; + public PlanTranslatorContext(CascadesContext ctx) { this.connectContext = ctx.getConnectContext(); this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); @@ -343,4 +345,13 @@ public void setRelationPushAggOp(RelationId relationId, TPushAggOp aggOp) { public TPushAggOp getRelationPushAggOp(RelationId relationId) { return tablePushAggOp.getOrDefault(relationId, TPushAggOp.NONE); } + + public boolean isTopMaterializeNode() { + return isTopMaterializeNode; + } + + public void setTopMaterializeNode(boolean topMaterializeNode) { + isTopMaterializeNode = topMaterializeNode; + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java index a8654e27291c06..daaf060e70876c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.processor.post; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.processor.post.materialize.LazyMaterializeTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.qe.ConnectContext; import org.apache.doris.thrift.TRuntimeFilterMode; @@ -60,6 +61,10 @@ public List getProcessors() { Builder builder = ImmutableList.builder(); builder.add(new PushDownFilterThroughProject()); builder.add(new RemoveUselessProjectPostProcessor()); + if (cascadesContext.getConnectContext().getSessionVariable().enableTopnLazyMaterialization) { + // LazyMaterializeTopN should run before MergeProjectPostProcessor + builder.add(new LazyMaterializeTopN()); + } builder.add(new MergeProjectPostProcessor()); builder.add(new RecomputeLogicalPropertiesProcessor()); if (cascadesContext.getConnectContext().getSessionVariable().enableAggregateCse) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java index dbbf34c5eb6e85..595e3717b5b34d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java @@ -46,6 +46,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalDistribute; import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter; import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalOneRowRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; @@ -502,6 +503,13 @@ public PhysicalRelation visitPhysicalRelation(PhysicalRelation relation, Cascade return relation; } + @Override + public Plan visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterializeOlapScan scan, CascadesContext context) { + RuntimeFilterContext ctx = context.getRuntimeFilterContext(); + scan.getOutput().forEach(slot -> ctx.aliasTransferMapPut(slot, Pair.of(scan, slot))); + return scan; + } + @Override public PhysicalSetOperation visitPhysicalSetOperation(PhysicalSetOperation setOperation, CascadesContext context) { setOperation.children().forEach(child -> child.accept(this, context)); @@ -684,6 +692,11 @@ private void getAllJoinInfo(PhysicalPlan root, Set joins) { public static void getAllScanInfo(Plan root, Set scans) { if (root instanceof PhysicalRelation) { scans.add((PhysicalRelation) root); + // if (root instanceof PhysicalLazyMaterializeOlapScan) { + // scans.add(((PhysicalLazyMaterializeOlapScan) root).getScan()); + // } else { + // scans.add((PhysicalRelation) root); + // } } else { for (Plan child : root.children()) { getAllScanInfo(child, scans); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java index bf83f4d2afe44c..adbb5e2c863532 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java @@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalJoin; import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; @@ -189,6 +190,11 @@ public Boolean visit(Plan plan, PushDownContext ctx) { return pushed; } + @Override + public Boolean visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterializeOlapScan scan, PushDownContext ctx) { + return visitPhysicalRelation(scan, ctx); + } + @Override public Boolean visitPhysicalRelation(PhysicalRelation scan, PushDownContext ctx) { if (scan instanceof PhysicalSchemaScan) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterContext.java index 5df829e57966ec..4fa902203a77d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterContext.java @@ -57,6 +57,10 @@ public boolean isTopnFilterSource(TopN topn) { return filters.containsKey(topn.getObjectId()); } + public TopnFilter getTopnFilter(TopN topn) { + return filters.get(topn.getObjectId()); + } + public List getTopnFilters() { return Lists.newArrayList(filters.values()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterPushDownVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterPushDownVisitor.java index 62fc912b14bd5b..101b29d91f937b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterPushDownVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopnFilterPushDownVisitor.java @@ -32,6 +32,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalFileScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalJdbcScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalOdbcScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; @@ -231,12 +232,18 @@ public Boolean visitPhysicalRelation(PhysicalRelation relation, PushDownContext return false; } + @Override + public Boolean visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterializeOlapScan lazyScan, PushDownContext ctx) { + return visitPhysicalRelation(lazyScan, ctx); + } + private boolean supportPhysicalRelations(PhysicalRelation relation) { return relation instanceof PhysicalOlapScan || relation instanceof PhysicalOdbcScan || relation instanceof PhysicalEsScan || relation instanceof PhysicalFileScan || relation instanceof PhysicalJdbcScan - || relation instanceof PhysicalDeferMaterializeOlapScan; + || relation instanceof PhysicalDeferMaterializeOlapScan + || relation instanceof PhysicalLazyMaterializeOlapScan; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java new file mode 100644 index 00000000000000..30547b63ffd707 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.processor.post.materialize; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Type; +import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.processor.post.PlanPostProcessor; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.AbstractPlan; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalCatalogRelation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterialize; +import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; +import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN; + +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * post rule to do lazy materialize + */ +public class LazyMaterializeTopN extends PlanPostProcessor { + /* BE do not support pattern: + union + -->materialize + -->topn + -->scan1 + -->materialize + -->topn + -->scan2 + when we create materializeNode for the first union child, set hasMaterialized=true + to avoid generating materializeNode for other union's children + */ + private boolean hasMaterialized = false; + + @Override + public Plan visitPhysicalTopN(PhysicalTopN topN, CascadesContext ctx) { + if (hasMaterialized) { + return topN; + } + /* + topn(output=[x] orderkey=[b]) + ->project(a as x) + ->T(a, b) + 'x' can be lazy materialized. + materializeMap: x->(T, a) + */ + Map materializeMap = new HashMap<>(); + List materializedSlots = new ArrayList<>(); + // find the slots which can be lazy materialized + for (Slot slot : topN.getOutput()) { + Optional source = computeMaterializeSource(topN, (SlotReference) slot); + if (source.isPresent()) { + SlotReference baseSlot = source.get().baseSlot; + if (source.get().baseSlot.hasSubColPath()) { + slot = baseSlot.withExprId(slot.getExprId()); + } + materializeMap.put(slot, source.get()); + } else { + materializedSlots.add(slot); + } + } + // find out the slots which are worth doing lazy materialization + List lazyMaterializeSlots = filterSlotsForLazyMaterialization(materializeMap); + if (lazyMaterializeSlots.isEmpty()) { + return topN; + } + + Map> relationToLazySlotMap = new HashMap<>(); + for (Slot slot : lazyMaterializeSlots) { + MaterializeSource source = materializeMap.get(slot); + relationToLazySlotMap.computeIfAbsent(source.relation, relation -> new ArrayList<>()).add(slot); + } + + Plan result = topN; + List originOutput = topN.getOutput(); + BiMap relationToRowId = HashBiMap.create(relationToLazySlotMap.size()); + HashSet rowIdSet = new HashSet<>(); + for (CatalogRelation relation : relationToLazySlotMap.keySet()) { + Column rowIdCol = new Column(Column.GLOBAL_ROWID_COL + relation.getTable().getName(), + Type.STRING, false, null, false, + "", relation.getTable().getName() + ".global_row_id"); + SlotReference rowIdSlot = SlotReference.fromColumn(relation.getTable(), rowIdCol, + relation.getQualifier()); + result = result.accept(new LazySlotPruning(), + new LazySlotPruning.Context((PhysicalCatalogRelation) relation, + rowIdSlot, relationToLazySlotMap.get(relation))); + relationToRowId.put(relation, rowIdSlot); + rowIdSet.add(rowIdSlot); + } + + // materialize.child.output requires + // rowId only appears once. + // that is [a, rowId1, b rowId1] is not acceptable + List materializeInput = moveRowIdsToTail(result.getOutput(), rowIdSet); + + if (materializeInput == null) { + /* + topn + -->any + => + project + -->materialize + -->topn + -->any + */ + result = new PhysicalLazyMaterialize(result, result.getOutput(), + materializedSlots, relationToLazySlotMap, relationToRowId, materializeMap, + null, ((AbstractPlan) result).getStats()); + hasMaterialized = true; + } else { + /* + topn + -->any + => + project + -->materialize + -->project + -->topn + -->any + */ + List reOrderedMaterializedSlots = new ArrayList<>(); + for (Slot slot : materializeInput) { + if (rowIdSet.contains(slot)) { + break; + } + reOrderedMaterializedSlots.add(slot); + } + result = new PhysicalProject(materializeInput, null, result); + result = new PhysicalLazyMaterialize(result, materializeInput, + reOrderedMaterializedSlots, relationToLazySlotMap, relationToRowId, materializeMap, + null, ((AbstractPlan) result).getStats()); + hasMaterialized = true; + } + result = new PhysicalProject(originOutput, null, result); + return result; + } + + /* + [a, r1, r2, b, r2] => [a, b, r1, r2] + move all rowIds to tail, and remove duplicated rowIds + */ + private List moveRowIdsToTail(List slots, Set rowIds) { + List reArrangedSlots = new ArrayList<>(); + List reArrangedRowIds = new ArrayList<>(); + boolean moved = false; + boolean meetRowId = false; + for (Slot slot : slots) { + if (rowIds.contains(slot)) { + if (!reArrangedRowIds.contains(slot)) { + reArrangedRowIds.add((SlotReference) slot); + } + meetRowId = true; + } else { + if (meetRowId) { + moved = true; + } + reArrangedSlots.add((SlotReference) slot); + } + } + if (!moved) { + return null; + } + reArrangedSlots.addAll(reArrangedRowIds); + return reArrangedSlots; + } + + private List filterSlotsForLazyMaterialization(Map materializeMap) { + return new ArrayList<>(materializeMap.keySet()); + } + + private Optional computeMaterializeSource(PhysicalTopN topN, SlotReference slot) { + MaterializeProbeVisitor probe = new MaterializeProbeVisitor(); + MaterializeProbeVisitor.ProbeContext context = new MaterializeProbeVisitor.ProbeContext(slot); + return probe.visit(topN, context); + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazySlotPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazySlotPruning.java new file mode 100644 index 00000000000000..f9858538d084ce --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazySlotPruning.java @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.processor.post.materialize; + +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalJoin; +import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalPlan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalCTEConsumer; +import org.apache.doris.nereids.trees.plans.physical.PhysicalCTEProducer; +import org.apache.doris.nereids.trees.plans.physical.PhysicalCatalogRelation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalFileScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeFileScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalOneRowRelation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRepeat; +import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * prune lazy materialized slot + */ +public class LazySlotPruning extends DefaultPlanRewriter { + /** + * Context + */ + public static class Context { + private PhysicalCatalogRelation scan; + private List lazySlots; + private SlotReference rowIdSlot; + + public Context(PhysicalCatalogRelation scan, SlotReference rowIdSlot, List lazySlots) { + this.scan = scan; + this.lazySlots = lazySlots; + this.rowIdSlot = rowIdSlot; + } + + private Context(PhysicalCatalogRelation scan, List lazySlots, SlotReference rowIdSlot) { + this.scan = scan; + this.lazySlots = lazySlots; + this.rowIdSlot = rowIdSlot; + } + + public Context withLazySlots(List otherLazySlots) { + return new Context(this.scan, otherLazySlots, this.rowIdSlot); + } + + public void forceRowIdNullable() { + rowIdSlot = rowIdSlot.withNullable(true); + } + + public void updateRowIdSlot(SlotReference rowIdSlot) { + this.rowIdSlot = rowIdSlot; + } + } + + @Override + public Plan visit(Plan plan, Context context) { + ImmutableList.Builder newChildren = ImmutableList.builderWithExpectedSize(plan.arity()); + boolean hasNewChildren = false; + for (Plan child : plan.children()) { + if (child.getOutput().containsAll(context.lazySlots)) { + Plan newChild = child.accept(this, context); + if (newChild != child) { + hasNewChildren = true; + } + newChildren.add(newChild); + } else { + newChildren.add(child); + } + } + + if (hasNewChildren) { + AbstractPhysicalPlan physicalPlan = (AbstractPhysicalPlan) plan; + plan = ((AbstractPhysicalPlan) plan.withChildren(newChildren.build())) + .copyStatsAndGroupIdFrom(physicalPlan).resetLogicalProperties(); + } + return plan; + } + + @Override + public Plan visitPhysicalOlapScan(PhysicalOlapScan scan, Context context) { + if (scan.getOutput().containsAll(context.lazySlots)) { + PhysicalLazyMaterializeOlapScan lazyScan = new PhysicalLazyMaterializeOlapScan(scan, + context.rowIdSlot, context.lazySlots); + return lazyScan; + } else { + // should not hit here + throw new RuntimeException("Lazy materialize fault"); + } + } + + @Override + public Plan visitPhysicalFileScan(PhysicalFileScan scan, Context context) { + if (scan.getOutput().containsAll(context.lazySlots)) { + PhysicalLazyMaterializeFileScan lazyScan = new PhysicalLazyMaterializeFileScan(scan, + context.rowIdSlot, context.lazySlots); + return lazyScan; + } else { + // should not hit here + throw new RuntimeException("Lazy materialize fault"); + } + } + + @Override + public Plan visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterializeOlapScan scan, Context context) { + // should not come here + return scan; + } + + // stop pruning when meet OutputPrunable plan node + @Override + public Plan visitPhysicalHashAggregate(PhysicalHashAggregate aggregate, Context context) { + return aggregate; + } + + @Override + public Plan visitPhysicalCTEConsumer(PhysicalCTEConsumer cteConsumer, Context context) { + return cteConsumer; + } + + @Override + public Plan visitPhysicalRepeat(PhysicalRepeat repeat, Context context) { + return repeat; + } + + @Override + public Plan visitPhysicalSetOperation(PhysicalSetOperation setOperation, Context context) { + return setOperation; + } + + @Override + public Plan visitPhysicalCTEProducer(PhysicalCTEProducer producer, Context context) { + return producer; + } + + @Override + public Plan visitPhysicalOneRowRelation(PhysicalOneRowRelation oneRowRelation, Context context) { + return oneRowRelation; + } + + @Override + public Plan visitPhysicalProject(PhysicalProject project, Context context) { + // project A as B + // singleSlotAliasMap: B->A + Map singleSlotAliasMap = new HashMap<>(); + for (NamedExpression ne : project.getProjects()) { + if (ne instanceof Alias && ne.child(0) instanceof Slot) { + singleSlotAliasMap.put(ne.toSlot(), (Slot) ne.child(0)); + } + } + + Plan child = project.child(); + if (singleSlotAliasMap.isEmpty()) { + child = child.accept(this, context); + } else { + List childLazySlots = new ArrayList<>(); + for (Slot slot : context.lazySlots) { + if (singleSlotAliasMap.containsKey(slot)) { + childLazySlots.add(singleSlotAliasMap.get(slot)); + } else { + childLazySlots.add(slot); + } + } + Context childContext = context.withLazySlots(childLazySlots); + child = child.accept(this, childContext); + } + if (child.getOutput().contains(context.rowIdSlot)) { + List newProjections = new ArrayList<>(); + for (NamedExpression ne : project.getProjects()) { + if (!context.lazySlots.contains(ne.toSlot())) { + newProjections.add(ne); + } + } + newProjections.add(context.rowIdSlot); + project = project.withProjectionsAndChild(newProjections, child).resetLogicalProperties(); + } + return project; + } + + @Override + public Plan visitAbstractPhysicalJoin(AbstractPhysicalJoin join, Context context) { + ImmutableList.Builder newChildren = ImmutableList.builderWithExpectedSize(2); + boolean hasNewChildren = false; + for (int i = 0; i < 2; i++) { + Plan child = join.child(i); + if (child.getOutputSet().containsAll(context.lazySlots)) { + Plan newChild = child.accept(this, context); + if (newChild != child) { + hasNewChildren = true; + } + newChildren.add(newChild); + } else { + newChildren.add(child); + } + } + Plan plan = join; + if (hasNewChildren) { + AbstractPhysicalPlan physicalPlan = (AbstractPhysicalPlan) plan; + plan = ((AbstractPhysicalPlan) plan.withChildren(newChildren.build())) + .copyStatsAndGroupIdFrom(physicalPlan).resetLogicalProperties(); + // update rowIdSlot.nullable after outer join + int rowIdPos = plan.getOutput().indexOf(context.rowIdSlot); + if (rowIdPos != -1 && !context.rowIdSlot.nullable()) { + SlotReference rowIdSlot = (SlotReference) plan.getOutput().get(rowIdPos); + if (join.getJoinType().isFullOuterJoin()) { + context.updateRowIdSlot(rowIdSlot.withNullable(true)); + } else if (join.getJoinType().isLeftOuterJoin() && plan.child(1).getOutput().contains(rowIdSlot)) { + context.updateRowIdSlot(rowIdSlot.withNullable(true)); + } else if (join.getJoinType().isRightOuterJoin() && plan.child(0).getOutput().contains(rowIdSlot)) { + context.updateRowIdSlot(rowIdSlot.withNullable(true)); + } + } + } + return plan; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java new file mode 100644 index 00000000000000..e42a0b4c1e1b8f --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeProbeVisitor.java @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.processor.post.materialize; + +import org.apache.doris.catalog.HiveTable; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; +import org.apache.doris.datasource.iceberg.IcebergExternalTable; +import org.apache.doris.nereids.processor.post.materialize.MaterializeProbeVisitor.ProbeContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalCatalogRelation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterialize; +import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; +import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; + +import com.google.common.collect.ImmutableSet; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Optional; +import java.util.Set; + +/** + * visitor to probe the slots which can perform lazy materialization + */ +public class MaterializeProbeVisitor extends DefaultPlanVisitor, ProbeContext> { + protected static final Logger LOG = LogManager.getLogger(MaterializeProbeVisitor.class); + + private static Set SUPPORT_RELATION_TYPES = ImmutableSet.of( + OlapTable.class, + HiveTable.class, + IcebergExternalTable.class, + HMSExternalTable.class + ); + + /** + * context + */ + public static class ProbeContext { + public SlotReference slot; + + /** + * constructor + */ + public ProbeContext(SlotReference slot) { + this.slot = slot; + } + } + + @Override + public Optional visit(Plan plan, ProbeContext context) { + if (plan.getInputSlots().contains(context.slot)) { + return Optional.empty(); + } + + Plan next = null; + for (Plan child : plan.children()) { + if (child.getOutput().contains(context.slot)) { + next = child; + break; + } + } + if (next == null) { + return Optional.empty(); + } else { + return next.accept(this, context); + } + } + + boolean checkRelationTableSupportedType(PhysicalCatalogRelation relation) { + if (!SUPPORT_RELATION_TYPES.contains(relation.getTable().getClass())) { + return false; + } + + if (relation.getTable() instanceof HMSExternalTable) { + HMSExternalTable hmsExternalTable = (HMSExternalTable) relation.getTable(); + return (hmsExternalTable.getDlaType() == DLAType.HIVE && hmsExternalTable.supportedHiveTopNLazyTable()) + || hmsExternalTable.getDlaType() == DLAType.ICEBERG; + } + return true; + } + + @Override + public Optional visitPhysicalOlapScan(PhysicalOlapScan scan, ProbeContext context) { + if (scan.getSelectedIndexId() == scan.getTable().getBaseIndexId()) { + return visitPhysicalCatalogRelation(scan, context); + } + return Optional.empty(); + } + + @Override + public Optional visitPhysicalCatalogRelation( + PhysicalCatalogRelation relation, ProbeContext context) { + if (checkRelationTableSupportedType(relation) + && relation.getOutput().contains(context.slot) + && !relation.getOperativeSlots().contains(context.slot)) { + // lazy materialize slot must be a passive slot + if (context.slot.getOriginalColumn().isPresent()) { + return Optional.of(new MaterializeSource(relation, context.slot)); + } else { + LOG.info("lazy materialize {} failed, because its column is empty", context.slot); + } + } + return Optional.empty(); + } + + @Override + public Optional visitPhysicalLazyMaterialize( + PhysicalLazyMaterialize materialize, ProbeContext context) { + return materialize.child().accept(this, context); + } + + @Override + public Optional visitPhysicalSetOperation( + PhysicalSetOperation setOperation, ProbeContext context) { + /* + union_all could support lazy materialization, but there are efficiency issues in BE. + And hence, all set operation are not support lazy materialization. + */ + return Optional.empty(); + } + + @Override + public Optional visitPhysicalProject( + PhysicalProject project, ProbeContext context) { + int idx = project.getOutput().indexOf(context.slot); + if (idx < 0) { + return Optional.empty(); + } + NamedExpression projectExpr = project.getProjects().get(idx); + if (projectExpr instanceof SlotReference) { + return project.child().accept(this, context); + } else { + // projectExpr is alias + Alias alias = (Alias) projectExpr; + if (alias.child() instanceof SlotReference) { + ProbeContext childContext = new ProbeContext((SlotReference) alias.child()); + return project.child().accept(this, childContext); + } else { + return Optional.empty(); + } + } + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeSource.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeSource.java new file mode 100644 index 00000000000000..d367dd7957d323 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/MaterializeSource.java @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.processor.post.materialize; + +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation; + +/** + the table and slot used to do lazy materialize + */ +public class MaterializeSource { + public final CatalogRelation relation; + public final SlotReference baseSlot; + + /* + constructor + */ + public MaterializeSource(CatalogRelation relation, SlotReference baseSlot) { + this.relation = relation; + this.baseSlot = baseSlot; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 27569c1fef95b5..e075972c792b6c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -412,14 +412,14 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio if (hmsTable.getDlaType() == DLAType.HUDI) { LogicalHudiScan hudiScan = new LogicalHudiScan(unboundRelation.getRelationId(), hmsTable, qualifierWithoutTableName, unboundRelation.getTableSample(), - unboundRelation.getTableSnapshot()); + unboundRelation.getTableSnapshot(), ImmutableList.of()); hudiScan = hudiScan.withScanParams(hmsTable, unboundRelation.getScanParams()); return hudiScan; } else { return new LogicalFileScan(unboundRelation.getRelationId(), (HMSExternalTable) table, qualifierWithoutTableName, unboundRelation.getTableSample(), - unboundRelation.getTableSnapshot()); + unboundRelation.getTableSnapshot(), ImmutableList.of()); } case ICEBERG_EXTERNAL_TABLE: case PAIMON_EXTERNAL_TABLE: @@ -428,7 +428,7 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio case LAKESOUl_EXTERNAL_TABLE: return new LogicalFileScan(unboundRelation.getRelationId(), (ExternalTable) table, qualifierWithoutTableName, unboundRelation.getTableSample(), - unboundRelation.getTableSnapshot()); + unboundRelation.getTableSnapshot(), ImmutableList.of()); case SCHEMA: // schema table's name is case-insensitive, we need save its name in SQL text to get correct case. return new LogicalSubQueryAlias<>(qualifiedTableName, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java index 70ab9b1d502c0d..4359e6675de808 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalFileScanToPhysicalFileScan.java @@ -41,7 +41,8 @@ public Rule build() { fileScan.getLogicalProperties(), fileScan.getSelectedPartitions(), fileScan.getTableSample(), - fileScan.getTableSnapshot()) + fileScan.getTableSnapshot(), + fileScan.getOperativeSlots()) ).toRule(RuleType.LOGICAL_FILE_SCAN_TO_PHYSICAL_FILE_SCAN_RULE); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalHudiScanToPhysicalHudiScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalHudiScanToPhysicalHudiScan.java index d8beef8d042ed9..97ed60c6078167 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalHudiScanToPhysicalHudiScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalHudiScanToPhysicalHudiScan.java @@ -42,7 +42,8 @@ public Rule build() { fileScan.getTableSample(), fileScan.getTableSnapshot(), fileScan.getScanParams(), - fileScan.getIncrementalRelation()) + fileScan.getIncrementalRelation(), + fileScan.getOperativeSlots()) ).toRule(RuleType.LOGICAL_HUDI_SCAN_TO_PHYSICAL_HUDI_SCAN_RULE); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java index 765081ae016afe..07caf1b295297d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java @@ -259,6 +259,9 @@ public List buildRules() { private Plan deferMaterialize(LogicalResultSink logicalResultSink, LogicalTopN logicalTopN, Optional> logicalProject, Optional> logicalFilter, LogicalOlapScan logicalOlapScan) { + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableTopnLazyMaterialization) { + return null; + } Column rowId = new Column(Column.ROWID_COL, Type.STRING, false, null, false, "", "rowid column"); SlotReference columnId = SlotReference.fromColumn( logicalOlapScan.getTable(), rowId, logicalOlapScan.getQualifier()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OperativeColumnDerive.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OperativeColumnDerive.java index 74a97fc2c64826..7af4c2428c1182 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OperativeColumnDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OperativeColumnDerive.java @@ -17,6 +17,8 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.rules.rewrite.OperativeColumnDerive.DeriveContext; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -24,6 +26,7 @@ import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalSink; import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; @@ -118,6 +121,25 @@ public Plan visitLogicalProject(LogicalProject project, DeriveCo return plan; } + @Override + public Plan visitLogicalOlapScan(LogicalOlapScan olapScan, DeriveContext context) { + Set intersectSlots = new HashSet<>(olapScan.getOutput()); + intersectSlots.retainAll(context.operativeSlots); + OlapTable table = olapScan.getTable(); + if (KeysType.UNIQUE_KEYS.equals(table.getKeysType()) + && !table.getTableProperty().getEnableUniqueKeyMergeOnWrite() + || KeysType.AGG_KEYS.equals(table.getKeysType()) + || KeysType.PRIMARY_KEYS.equals(table.getKeysType())) { + for (Slot slot : olapScan.getOutput()) { + SlotReference slotReference = (SlotReference) slot; + if (slotReference.getOriginalColumn().isPresent() && slotReference.getOriginalColumn().get().isKey()) { + intersectSlots.add(slotReference); + } + } + } + return (Plan) olapScan.withOperativeSlots(intersectSlots); + } + @Override public Plan visitLogicalCatalogRelation(LogicalCatalogRelation relation, DeriveContext context) { Set intersectSlots = new HashSet<>(relation.getOutput()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 6679e5086075cd..cd31fbe9834548 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -136,6 +136,7 @@ public enum PlanType { PHYSICAL_TOP_N, PHYSICAL_UNION, PHYSICAL_WINDOW, + PHYSICAL_MATERIALIZE, // commands ADMIN_CHECK_TABLETS_COMMAND, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/CatalogRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/CatalogRelation.java index 04e963459062d4..e73e54d8f41a12 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/CatalogRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/CatalogRelation.java @@ -34,6 +34,8 @@ public interface CatalogRelation extends Relation { DatabaseIf getDatabase() throws AnalysisException; + List getQualifier(); + default CatalogRelation withOperativeSlots(Collection operativeSlots) { return this; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java index f9c7438c20ca23..3d1ddf9a26a8b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java @@ -114,6 +114,7 @@ public List computeOutput() { .collect(ImmutableList.toImmutableList()); } + @Override public List getQualifier() { return qualifier; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java index c64d1bb73f1050..73571ea38787eb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.trees.TableSample; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.RelationId; @@ -33,6 +34,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; @@ -52,18 +54,19 @@ public class LogicalFileScan extends LogicalCatalogRelation { protected LogicalFileScan(RelationId id, ExternalTable table, List qualifier, Optional groupExpression, Optional logicalProperties, SelectedPartitions selectedPartitions, Optional tableSample, - Optional tableSnapshot) { - super(id, PlanType.LOGICAL_FILE_SCAN, table, qualifier, groupExpression, logicalProperties); + Optional tableSnapshot, Collection operativeSlots) { + super(id, PlanType.LOGICAL_FILE_SCAN, table, qualifier, groupExpression, logicalProperties, operativeSlots); this.selectedPartitions = selectedPartitions; this.tableSample = tableSample; this.tableSnapshot = tableSnapshot; } public LogicalFileScan(RelationId id, ExternalTable table, List qualifier, - Optional tableSample, Optional tableSnapshot) { + Optional tableSample, Optional tableSnapshot, Collection operativeSlots) { this(id, table, qualifier, Optional.empty(), Optional.empty(), table.initSelectedPartitions(MvccUtil.getSnapshotFromContext(table)), - tableSample, tableSnapshot); + tableSample, tableSnapshot, + operativeSlots); } public SelectedPartitions getSelectedPartitions() { @@ -89,32 +92,37 @@ public ExternalTable getTable() { public String toString() { return Utils.toSqlString("LogicalFileScan", "qualified", qualifiedName(), - "output", getOutput() + "output", getOutput(), + "operativeCols", operativeSlots ); } @Override public LogicalFileScan withGroupExpression(Optional groupExpression) { return new LogicalFileScan(relationId, (ExternalTable) table, qualifier, groupExpression, - Optional.of(getLogicalProperties()), selectedPartitions, tableSample, tableSnapshot); + Optional.of(getLogicalProperties()), selectedPartitions, tableSample, tableSnapshot, + operativeSlots); } @Override public Plan withGroupExprLogicalPropChildren(Optional groupExpression, Optional logicalProperties, List children) { return new LogicalFileScan(relationId, (ExternalTable) table, qualifier, - groupExpression, logicalProperties, selectedPartitions, tableSample, tableSnapshot); + groupExpression, logicalProperties, selectedPartitions, tableSample, tableSnapshot, + operativeSlots); } public LogicalFileScan withSelectedPartitions(SelectedPartitions selectedPartitions) { return new LogicalFileScan(relationId, (ExternalTable) table, qualifier, Optional.empty(), - Optional.of(getLogicalProperties()), selectedPartitions, tableSample, tableSnapshot); + Optional.of(getLogicalProperties()), selectedPartitions, tableSample, tableSnapshot, + operativeSlots); } @Override public LogicalFileScan withRelationId(RelationId relationId) { return new LogicalFileScan(relationId, (ExternalTable) table, qualifier, Optional.empty(), - Optional.empty(), selectedPartitions, tableSample, tableSnapshot); + Optional.empty(), selectedPartitions, tableSample, tableSnapshot, + operativeSlots); } @Override @@ -179,4 +187,17 @@ public int hashCode() { return Objects.hash(selectedPartitions, isPruned); } } + + @Override + public LogicalFileScan withOperativeSlots(Collection operativeSlots) { + return new LogicalFileScan(relationId, (ExternalTable) table, qualifier, + groupExpression, Optional.of(getLogicalProperties()), + selectedPartitions, tableSample, tableSnapshot, + operativeSlots); + } + + @Override + public List getOperativeSlots() { + return operativeSlots; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java index b742142093bdf1..a0bf5805ada889 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java @@ -72,9 +72,10 @@ protected LogicalHudiScan(RelationId id, ExternalTable table, List quali Optional groupExpression, Optional logicalProperties, SelectedPartitions selectedPartitions, Optional tableSample, Optional tableSnapshot, - Optional scanParams, Optional incrementalRelation) { + Optional scanParams, Optional incrementalRelation, + List operativeSlots) { super(id, table, qualifier, groupExpression, logicalProperties, - selectedPartitions, tableSample, tableSnapshot); + selectedPartitions, tableSample, tableSnapshot, operativeSlots); Objects.requireNonNull(scanParams, "scanParams should not null"); Objects.requireNonNull(incrementalRelation, "incrementalRelation should not null"); this.scanParams = scanParams; @@ -82,10 +83,11 @@ protected LogicalHudiScan(RelationId id, ExternalTable table, List quali } public LogicalHudiScan(RelationId id, ExternalTable table, List qualifier, - Optional tableSample, Optional tableSnapshot) { + Optional tableSample, Optional tableSnapshot, List operativeSlots) { this(id, table, qualifier, Optional.empty(), Optional.empty(), ((HMSExternalTable) table).initHudiSelectedPartitions(tableSnapshot), tableSample, tableSnapshot, - Optional.empty(), Optional.empty()); + Optional.empty(), Optional.empty(), + operativeSlots); } public Optional getScanParams() { @@ -136,7 +138,7 @@ public String toString() { public LogicalHudiScan withGroupExpression(Optional groupExpression) { return new LogicalHudiScan(relationId, (ExternalTable) table, qualifier, groupExpression, Optional.of(getLogicalProperties()), selectedPartitions, tableSample, tableSnapshot, - scanParams, incrementalRelation); + scanParams, incrementalRelation, operativeSlots); } @Override @@ -144,20 +146,20 @@ public Plan withGroupExprLogicalPropChildren(Optional groupExpr Optional logicalProperties, List children) { return new LogicalHudiScan(relationId, (ExternalTable) table, qualifier, groupExpression, logicalProperties, selectedPartitions, tableSample, tableSnapshot, - scanParams, incrementalRelation); + scanParams, incrementalRelation, operativeSlots); } public LogicalHudiScan withSelectedPartitions(SelectedPartitions selectedPartitions) { return new LogicalHudiScan(relationId, (ExternalTable) table, qualifier, Optional.empty(), Optional.of(getLogicalProperties()), selectedPartitions, tableSample, tableSnapshot, - scanParams, incrementalRelation); + scanParams, incrementalRelation, operativeSlots); } @Override public LogicalHudiScan withRelationId(RelationId relationId) { return new LogicalHudiScan(relationId, (ExternalTable) table, qualifier, Optional.empty(), Optional.empty(), selectedPartitions, tableSample, tableSnapshot, - scanParams, incrementalRelation); + scanParams, incrementalRelation, operativeSlots); } @Override @@ -216,6 +218,6 @@ public LogicalHudiScan withScanParams(HMSExternalTable table, TableScanParams sc newScanParams = Optional.ofNullable(scanParams); return new LogicalHudiScan(relationId, table, qualifier, Optional.empty(), Optional.empty(), selectedPartitions, tableSample, tableSnapshot, - newScanParams, newIncrementalRelation); + newScanParams, newIncrementalRelation, operativeSlots); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java index 34970b25a0cd83..22018e9732973c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCatalogRelation.java @@ -42,6 +42,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -54,6 +55,7 @@ public abstract class PhysicalCatalogRelation extends PhysicalRelation implement protected final TableIf table; protected final ImmutableList qualifier; + protected final ImmutableList operativeSlots; /** * Constructor for PhysicalCatalogRelation. @@ -62,10 +64,13 @@ public abstract class PhysicalCatalogRelation extends PhysicalRelation implement * @param qualifier qualified relation name */ public PhysicalCatalogRelation(RelationId relationId, PlanType type, TableIf table, List qualifier, - Optional groupExpression, LogicalProperties logicalProperties) { + Optional groupExpression, LogicalProperties logicalProperties, + Collection operativeSlots) { super(relationId, type, groupExpression, logicalProperties); this.table = Objects.requireNonNull(table, "table can not be null"); this.qualifier = ImmutableList.copyOf(Objects.requireNonNull(qualifier, "qualifier can not be null")); + this.operativeSlots = ImmutableList.copyOf(Objects.requireNonNull(operativeSlots, + "operativeSlots can not be null")); } /** @@ -77,10 +82,13 @@ public PhysicalCatalogRelation(RelationId relationId, PlanType type, TableIf tab public PhysicalCatalogRelation(RelationId relationId, PlanType type, TableIf table, List qualifier, Optional groupExpression, LogicalProperties logicalProperties, PhysicalProperties physicalProperties, - Statistics statistics) { + Statistics statistics, + Collection operativeSlots) { super(relationId, type, groupExpression, logicalProperties, physicalProperties, statistics); this.table = Objects.requireNonNull(table, "table can not be null"); this.qualifier = ImmutableList.copyOf(Objects.requireNonNull(qualifier, "qualifier can not be null")); + this.operativeSlots = ImmutableList.copyOf(Objects.requireNonNull(operativeSlots, + "operativeSlots can not be null")); } @Override @@ -119,6 +127,7 @@ public List computeOutput() { .collect(ImmutableList.toImmutableList()); } + @Override public List getQualifier() { return qualifier; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java index f82bd6dbec5342..fdf88955fec330 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java @@ -29,6 +29,8 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import com.google.common.collect.ImmutableList; + import java.util.List; import java.util.Objects; import java.util.Optional; @@ -62,7 +64,8 @@ public PhysicalDeferMaterializeOlapScan(PhysicalOlapScan physicalOlapScan, PhysicalProperties physicalProperties, Statistics statistics) { super(physicalOlapScan.getRelationId(), physicalOlapScan.getType(), physicalOlapScan.getTable(), physicalOlapScan.getQualifier(), - groupExpression, logicalProperties, physicalProperties, statistics); + groupExpression, logicalProperties, physicalProperties, statistics, + ImmutableList.of()); this.physicalOlapScan = physicalOlapScan; this.deferMaterializeSlotIds = deferMaterializeSlotIds; this.columnIdSlot = columnIdSlot; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalEsScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalEsScan.java index 8c6d348e6fb8ab..78939714cce30c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalEsScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalEsScan.java @@ -29,6 +29,8 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import com.google.common.collect.ImmutableList; + import java.util.List; import java.util.Optional; @@ -45,7 +47,7 @@ public class PhysicalEsScan extends PhysicalCatalogRelation { public PhysicalEsScan(RelationId id, TableIf table, List qualifier, DistributionSpec distributionSpec, Optional groupExpression, LogicalProperties logicalProperties) { - super(id, PlanType.PHYSICAL_ES_SCAN, table, qualifier, groupExpression, logicalProperties); + super(id, PlanType.PHYSICAL_ES_SCAN, table, qualifier, groupExpression, logicalProperties, ImmutableList.of()); this.distributionSpec = distributionSpec; } @@ -56,7 +58,7 @@ public PhysicalEsScan(RelationId id, TableIf table, List qualifier, DistributionSpec distributionSpec, Optional groupExpression, LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics) { super(id, PlanType.PHYSICAL_ES_SCAN, table, qualifier, groupExpression, logicalProperties, - physicalProperties, statistics); + physicalProperties, statistics, ImmutableList.of()); this.distributionSpec = distributionSpec; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java index d1a0d9c6c4a7f2..9e1195e3f8b4b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalFileScan.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.TableSample; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.RelationId; @@ -32,6 +33,7 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import java.util.Collection; import java.util.List; import java.util.Optional; @@ -52,9 +54,9 @@ public PhysicalFileScan(RelationId id, ExternalTable table, List qualifi DistributionSpec distributionSpec, Optional groupExpression, LogicalProperties logicalProperties, SelectedPartitions selectedPartitions, Optional tableSample, - Optional tableSnapshot) { + Optional tableSnapshot, Collection operativeSlots) { this(id, PlanType.PHYSICAL_FILE_SCAN, table, qualifier, distributionSpec, groupExpression, - logicalProperties, selectedPartitions, tableSample, tableSnapshot); + logicalProperties, selectedPartitions, tableSample, tableSnapshot, operativeSlots); } /** @@ -64,9 +66,11 @@ public PhysicalFileScan(RelationId id, ExternalTable table, List qualifi DistributionSpec distributionSpec, Optional groupExpression, LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics, SelectedPartitions selectedPartitions, - Optional tableSample, Optional tableSnapshot) { + Optional tableSample, Optional tableSnapshot, + Collection operativeSlots) { this(id, PlanType.PHYSICAL_FILE_SCAN, table, qualifier, distributionSpec, groupExpression, - logicalProperties, physicalProperties, statistics, selectedPartitions, tableSample, tableSnapshot); + logicalProperties, physicalProperties, statistics, selectedPartitions, tableSample, tableSnapshot, + operativeSlots); } /** @@ -76,8 +80,8 @@ protected PhysicalFileScan(RelationId id, PlanType type, ExternalTable table, Li DistributionSpec distributionSpec, Optional groupExpression, LogicalProperties logicalProperties, SelectedPartitions selectedPartitions, Optional tableSample, - Optional tableSnapshot) { - super(id, type, table, qualifier, groupExpression, logicalProperties); + Optional tableSnapshot, Collection operativeSlots) { + super(id, type, table, qualifier, groupExpression, logicalProperties, operativeSlots); this.distributionSpec = distributionSpec; this.selectedPartitions = selectedPartitions; this.tableSample = tableSample; @@ -86,11 +90,12 @@ protected PhysicalFileScan(RelationId id, PlanType type, ExternalTable table, Li protected PhysicalFileScan(RelationId id, PlanType type, ExternalTable table, List qualifier, DistributionSpec distributionSpec, Optional groupExpression, - LogicalProperties logicalProperties, PhysicalProperties physicalProperties, - Statistics statistics, SelectedPartitions selectedPartitions, - Optional tableSample, Optional tableSnapshot) { + LogicalProperties logicalProperties, + PhysicalProperties physicalProperties, Statistics statistics, + SelectedPartitions selectedPartitions, Optional tableSample, + Optional tableSnapshot, Collection operativeSlots) { super(id, type, table, qualifier, groupExpression, logicalProperties, - physicalProperties, statistics); + physicalProperties, statistics, operativeSlots); this.distributionSpec = distributionSpec; this.selectedPartitions = selectedPartitions; this.tableSample = tableSample; @@ -119,7 +124,8 @@ public String toString() { "stats", statistics, "qualified", Utils.qualifiedName(qualifier, table.getName()), "selected partitions num", - selectedPartitions.isPruned ? selectedPartitions.selectedPartitions.size() : "unknown" + selectedPartitions.isPruned ? selectedPartitions.selectedPartitions.size() : "unknown", + "operativeCols", getOperativeSlots() ); } @@ -131,14 +137,16 @@ public R accept(PlanVisitor visitor, C context) { @Override public PhysicalFileScan withGroupExpression(Optional groupExpression) { return new PhysicalFileScan(relationId, getTable(), qualifier, distributionSpec, - groupExpression, getLogicalProperties(), selectedPartitions, tableSample, tableSnapshot); + groupExpression, getLogicalProperties(), selectedPartitions, tableSample, tableSnapshot, + operativeSlots); } @Override public Plan withGroupExprLogicalPropChildren(Optional groupExpression, Optional logicalProperties, List children) { return new PhysicalFileScan(relationId, getTable(), qualifier, distributionSpec, - groupExpression, logicalProperties.get(), selectedPartitions, tableSample, tableSnapshot); + groupExpression, logicalProperties.get(), selectedPartitions, tableSample, tableSnapshot, + operativeSlots); } @Override @@ -151,6 +159,12 @@ public PhysicalFileScan withPhysicalPropertiesAndStats(PhysicalProperties physic Statistics statistics) { return new PhysicalFileScan(relationId, getTable(), qualifier, distributionSpec, groupExpression, getLogicalProperties(), physicalProperties, statistics, - selectedPartitions, tableSample, tableSnapshot); + selectedPartitions, tableSample, tableSnapshot, + operativeSlots); + } + + @Override + public List getOperativeSlots() { + return operativeSlots; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHudiScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHudiScan.java index 451c57233920f6..28497a39ad3ee1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHudiScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHudiScan.java @@ -26,6 +26,7 @@ import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.TableSample; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.RelationId; @@ -34,6 +35,7 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -55,9 +57,10 @@ public PhysicalHudiScan(RelationId id, ExternalTable table, List qualifi LogicalProperties logicalProperties, SelectedPartitions selectedPartitions, Optional tableSample, Optional tableSnapshot, - Optional scanParams, Optional incrementalRelation) { + Optional scanParams, Optional incrementalRelation, + Collection operativeSlots) { super(id, PlanType.PHYSICAL_HUDI_SCAN, table, qualifier, distributionSpec, groupExpression, logicalProperties, - selectedPartitions, tableSample, tableSnapshot); + selectedPartitions, tableSample, tableSnapshot, operativeSlots); Objects.requireNonNull(scanParams, "scanParams should not null"); Objects.requireNonNull(incrementalRelation, "incrementalRelation should not null"); this.scanParams = scanParams; @@ -72,9 +75,10 @@ public PhysicalHudiScan(RelationId id, ExternalTable table, List qualifi LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics, SelectedPartitions selectedPartitions, Optional tableSample, Optional tableSnapshot, - Optional scanParams, Optional incrementalRelation) { + Optional scanParams, Optional incrementalRelation, + Collection operativeSlots) { super(id, PlanType.PHYSICAL_HUDI_SCAN, table, qualifier, distributionSpec, groupExpression, logicalProperties, - physicalProperties, statistics, selectedPartitions, tableSample, tableSnapshot); + physicalProperties, statistics, selectedPartitions, tableSample, tableSnapshot, operativeSlots); this.scanParams = scanParams; this.incrementalRelation = incrementalRelation; } @@ -91,7 +95,7 @@ public Optional getIncrementalRelation() { public PhysicalHudiScan withGroupExpression(Optional groupExpression) { return new PhysicalHudiScan(relationId, getTable(), qualifier, distributionSpec, groupExpression, getLogicalProperties(), selectedPartitions, tableSample, tableSnapshot, - scanParams, incrementalRelation); + scanParams, incrementalRelation, operativeSlots); } @Override @@ -99,7 +103,7 @@ public Plan withGroupExprLogicalPropChildren(Optional groupExpr Optional logicalProperties, List children) { return new PhysicalHudiScan(relationId, getTable(), qualifier, distributionSpec, groupExpression, logicalProperties.get(), selectedPartitions, tableSample, tableSnapshot, - scanParams, incrementalRelation); + scanParams, incrementalRelation, operativeSlots); } @Override @@ -108,7 +112,7 @@ public PhysicalHudiScan withPhysicalPropertiesAndStats(PhysicalProperties physic return new PhysicalHudiScan(relationId, getTable(), qualifier, distributionSpec, groupExpression, getLogicalProperties(), physicalProperties, statistics, selectedPartitions, tableSample, tableSnapshot, - scanParams, incrementalRelation); + scanParams, incrementalRelation, operativeSlots); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalJdbcScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalJdbcScan.java index 7923c8af9f41e6..6054bd3b76800d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalJdbcScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalJdbcScan.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.RelationId; @@ -28,6 +29,9 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import com.google.common.collect.ImmutableList; + +import java.util.Collection; import java.util.List; import java.util.Optional; @@ -42,7 +46,7 @@ public class PhysicalJdbcScan extends PhysicalCatalogRelation { public PhysicalJdbcScan(RelationId id, TableIf table, List qualifier, Optional groupExpression, LogicalProperties logicalProperties) { this(id, table, qualifier, groupExpression, logicalProperties, - null, null); + null, null, ImmutableList.of()); } /** @@ -50,9 +54,10 @@ public PhysicalJdbcScan(RelationId id, TableIf table, List qualifier, */ public PhysicalJdbcScan(RelationId id, TableIf table, List qualifier, Optional groupExpression, - LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics) { + LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics, + Collection operativeSlots) { super(id, PlanType.PHYSICAL_JDBC_SCAN, table, qualifier, groupExpression, - logicalProperties, physicalProperties, statistics); + logicalProperties, physicalProperties, statistics, operativeSlots); } @Override @@ -84,6 +89,6 @@ public Plan withGroupExprLogicalPropChildren(Optional groupExpr public PhysicalJdbcScan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties, Statistics statistics) { return new PhysicalJdbcScan(relationId, table, qualifier, groupExpression, - getLogicalProperties(), physicalProperties, statistics); + getLogicalProperties(), physicalProperties, statistics, operativeSlots); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterialize.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterialize.java new file mode 100644 index 00000000000000..546c440c3c2389 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterialize.java @@ -0,0 +1,248 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.physical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.processor.post.materialize.MaterializeSource; +import org.apache.doris.nereids.properties.DataTrait.Builder; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.statistics.Statistics; + +import com.google.common.collect.BiMap; +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + lazy materialize node + */ +public class PhysicalLazyMaterialize extends PhysicalUnary { + + private final Map> relationToLazySlotMap; + + private final BiMap relationToRowId; + + private final Map materializeMap; + + private final List materializedSlots; + + private final List materializeInput; + private final List materializeOutput; + // used for BE + private final List rowIdList; + private List> lazyColumns = new ArrayList<>(); + private List> lazySlotLocations = new ArrayList<>(); + private List> lazyTableIdxs = new ArrayList<>(); + + private final List relations; + + /** + * constructor + */ + public PhysicalLazyMaterialize(CHILD_TYPE child, + List materializeInput, + List materializedSlots, + Map> relationToLazySlotMap, + BiMap relationToRowId, + Map materializeMap) { + this(child, materializeInput, materializedSlots, relationToLazySlotMap, + relationToRowId, materializeMap, null, null); + } + + /** + * constructor + */ + public PhysicalLazyMaterialize(CHILD_TYPE child, + List materializeInput, + List materializedSlots, + Map> relationToLazySlotMap, + BiMap relationToRowId, + Map materializeMap, + PhysicalProperties physicalProperties, Statistics statistics) { + super(PlanType.PHYSICAL_MATERIALIZE, Optional.empty(), + null, physicalProperties, statistics, child); + this.materializeInput = materializeInput; + this.relationToLazySlotMap = relationToLazySlotMap; + this.relationToRowId = relationToRowId; + this.materializedSlots = ImmutableList.copyOf(materializedSlots); + this.relations = ImmutableList.copyOf(relationToRowId.keySet()); + this.materializeMap = materializeMap; + + lazySlotLocations = new ArrayList<>(); + lazyTableIdxs = new ArrayList<>(); + lazyColumns = new ArrayList<>(); + + ImmutableList.Builder outputBuilder = ImmutableList.builder(); + outputBuilder.addAll(materializedSlots); + int idx = materializedSlots.size(); + int loc = idx; + ImmutableList.Builder rowIdListBuilder = ImmutableList.builder(); + for (; idx < materializeInput.size(); idx++) { + Slot rowId = materializeInput.get(idx); + rowIdListBuilder.add(rowId); + CatalogRelation rel = relationToRowId.inverse().get(rowId); + TableIf relationTable = rel.getTable(); + + List lazyColumnForRel = new ArrayList<>(); + lazyColumns.add(lazyColumnForRel); + List lazyIdxForRel = new ArrayList<>(); + lazyTableIdxs.add(lazyIdxForRel); + + List lazySlotLocationForRel = new ArrayList<>(); + lazySlotLocations.add(lazySlotLocationForRel); + for (Slot lazySlot : relationToLazySlotMap.get(rel)) { + outputBuilder.add(lazySlot); + lazyColumnForRel.add(materializeMap.get(lazySlot).baseSlot.getOriginalColumn().get()); + lazyIdxForRel.add(relationTable.getBaseColumnIdxByName(lazySlot.getName())); + lazySlotLocationForRel.add(loc); + loc++; + } + } + rowIdList = rowIdListBuilder.build(); + this.materializeOutput = outputBuilder.build(); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitPhysicalLazyMaterialize(this, context); + } + + @Override + public List getExpressions() { + return materializedSlots; + } + + @Override + public List computeOutput() { + return materializeOutput; + } + + @Override + public Plan withGroupExpression(Optional groupExpression) { + return null; + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional groupExpression, + Optional logicalProperties, List children) { + return null; + } + + @Override + public void computeUnique(Builder builder) { + + } + + @Override + public void computeUniform(Builder builder) { + + } + + @Override + public void computeEqualSet(Builder builder) { + + } + + @Override + public void computeFd(Builder builder) { + + } + + @Override + public Plan withChildren(List children) { + return new PhysicalLazyMaterialize<>(children.get(0), + materializeInput, materializedSlots, relationToLazySlotMap, + relationToRowId, materializeMap, null, null); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("PhysicalLazyMaterialize [Output= (") + .append(getOutput()).append("), lazySlots= ("); + for (Map.Entry> entry : relationToLazySlotMap.entrySet()) { + builder.append(entry.getValue()); + } + builder.append(")]"); + return builder.toString(); + } + + @Override + public PhysicalPlan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties, Statistics statistics) { + return new PhysicalLazyMaterialize(children.get(0), materializeInput, materializedSlots, relationToLazySlotMap, + relationToRowId, materializeMap, physicalProperties, statistics); + } + + @Override + public String shapeInfo() { + StringBuilder shapeBuilder = new StringBuilder(); + List lazySlots = new ArrayList<>(); + for (List slots : relationToLazySlotMap.values()) { + lazySlots.addAll(slots); + } + lazySlots = lazySlots.stream().sorted(new Comparator() { + @Override + public int compare(Slot slot, Slot t1) { + return slot.shapeInfo().compareTo(t1.shapeInfo()); + } + }).collect(Collectors.toList()); + shapeBuilder.append(this.getClass().getSimpleName()) + .append("[").append("materializedSlots:") + .append(ExpressionUtils.slotListShapeInfo(materializedSlots)) + .append(" lazySlots:") + .append(ExpressionUtils.slotListShapeInfo(lazySlots)); + shapeBuilder.append("]"); + return shapeBuilder.toString(); + } + + public List getRelations() { + return relations; + } + + public List> getLazyColumns() { + return lazyColumns; + } + + public List> getLazySlotLocations() { + return lazySlotLocations; + } + + public List> getlazyTableIdxs() { + return lazyTableIdxs; + } + + public List getRowIds() { + return rowIdList; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterializeFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterializeFileScan.java new file mode 100644 index 00000000000000..1465872a0f198c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterializeFileScan.java @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.physical; + +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; + +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Optional; + +/** + wrapper for FileScan used for lazy materialization + */ +public class PhysicalLazyMaterializeFileScan extends PhysicalFileScan { + private PhysicalFileScan scan; + private SlotReference rowId; + private final List lazySlots; + private List output; + + /** + * PhysicalLazyMaterializeFileScan + */ + public PhysicalLazyMaterializeFileScan(PhysicalFileScan scan, SlotReference rowId, List lazySlots) { + super(scan.getRelationId(), scan.getTable(), scan.getQualifier(), scan.getDistributionSpec(), + Optional.empty(), null, null, scan.getStats(), + scan.selectedPartitions, scan.getTableSample(), + scan.getTableSnapshot(), scan.getOperativeSlots()); + this.scan = scan; + this.rowId = rowId; + this.lazySlots = lazySlots; + } + + @Override + public List getQualifier() { + return scan.getQualifier(); + } + + @Override + public List computeOutput() { + if (output == null) { + output = ImmutableList.builder() + .addAll(scan.getOperativeSlots()) + .add(rowId).build(); + } + return output; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("PhysicalLazyMaterializeFileScan[") + .append(scan.toString()); + + if (!getAppliedRuntimeFilters().isEmpty()) { + getAppliedRuntimeFilters().forEach(rf -> sb.append(" RF").append(rf.getId().asInt())); + } + sb.append("]"); + return sb.toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterializeOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterializeOlapScan.java new file mode 100644 index 00000000000000..add4742099bb4c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalLazyMaterializeOlapScan.java @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.physical; + +import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.statistics.Statistics; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * PhysicalLazyMaterializeOlapScan + */ +public class PhysicalLazyMaterializeOlapScan extends PhysicalOlapScan { + + private PhysicalOlapScan scan; + private SlotReference rowId; + private final List lazySlots; + private List output; + + /** + * constr + */ + + public PhysicalLazyMaterializeOlapScan(PhysicalOlapScan physicalOlapScan, + SlotReference rowId, List lazySlots) { + super(physicalOlapScan.getRelationId(), physicalOlapScan.getTable(), physicalOlapScan.getQualifier(), + physicalOlapScan.getSelectedIndexId(), + physicalOlapScan.getSelectedTabletIds(), + physicalOlapScan.getSelectedPartitionIds(), + physicalOlapScan.getDistributionSpec(), + physicalOlapScan.getPreAggStatus(), + physicalOlapScan.getBaseOutputs(), + physicalOlapScan.getGroupExpression(), + null, + physicalOlapScan.getPhysicalProperties(), + physicalOlapScan.getStats(), + physicalOlapScan.getTableSample(), + physicalOlapScan.getOperativeSlots()); + this.scan = physicalOlapScan; + this.rowId = rowId; + this.lazySlots = ImmutableList.copyOf(lazySlots); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitPhysicalLazyMaterializeOlapScan(this, context); + } + + @Override + public List computeOutput() { + if (output == null) { + output = ImmutableList.builder() + .addAll(scan.getOperativeSlots()) + .add(rowId).build(); + } + return output; + } + + public PhysicalOlapScan getScan() { + return scan; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("PhysicalLazyMaterializeOlapScan[") + .append(scan.toString()); + + if (!getAppliedRuntimeFilters().isEmpty()) { + getAppliedRuntimeFilters().forEach(rf -> sb.append(" RF").append(rf.getId().asInt())); + } + sb.append("]"); + return sb.toString(); + } + + @Override + public String shapeInfo() { + StringBuilder shapeBuilder = new StringBuilder(); + shapeBuilder.append(this.getClass().getSimpleName()) + .append("[").append(scan.table.getName()).append(" lazySlots:") + .append(ExpressionUtils.slotListShapeInfo(lazySlots)) + .append("]"); + if (!getAppliedRuntimeFilters().isEmpty()) { + shapeBuilder.append(" apply RFs:"); + getAppliedRuntimeFilters() + .stream().forEach(rf -> shapeBuilder.append(" RF").append(rf.getId().asInt())); + } + return shapeBuilder.toString(); + } + + @Override + public PhysicalLazyMaterializeOlapScan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties, + Statistics statistics) { + return new PhysicalLazyMaterializeOlapScan(scan, rowId, lazySlots); + } + + public SlotReference getRowId() { + return rowId; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOdbcScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOdbcScan.java index 6ae6b931120d82..79774e7604183a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOdbcScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOdbcScan.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.RelationId; @@ -28,6 +29,9 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import com.google.common.collect.ImmutableList; + +import java.util.Collection; import java.util.List; import java.util.Optional; @@ -41,7 +45,7 @@ public class PhysicalOdbcScan extends PhysicalCatalogRelation { public PhysicalOdbcScan(RelationId id, TableIf table, List qualifier, Optional groupExpression, LogicalProperties logicalProperties) { this(id, table, qualifier, groupExpression, logicalProperties, - null, null); + null, null, ImmutableList.of()); } /** @@ -49,9 +53,10 @@ public PhysicalOdbcScan(RelationId id, TableIf table, List qualifier, */ public PhysicalOdbcScan(RelationId id, TableIf table, List qualifier, Optional groupExpression, - LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics) { + LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics, + Collection operativeSlots) { super(id, PlanType.PHYSICAL_ODBC_SCAN, table, qualifier, groupExpression, - logicalProperties, physicalProperties, statistics); + logicalProperties, physicalProperties, statistics, operativeSlots); } @Override @@ -83,6 +88,6 @@ public Plan withGroupExprLogicalPropChildren(Optional groupExpr public PhysicalOdbcScan withPhysicalPropertiesAndStats(PhysicalProperties physicalProperties, Statistics statistics) { return new PhysicalOdbcScan(relationId, table, qualifier, groupExpression, - getLogicalProperties(), physicalProperties, statistics); + getLogicalProperties(), physicalProperties, statistics, operativeSlots); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java index 2783adbafe78f3..3948ebdbae5fe1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java @@ -82,7 +82,7 @@ public PhysicalOlapScan(RelationId id, OlapTable olapTable, List qualifi Optional tableSample, Collection operativeSlots) { super(id, PlanType.PHYSICAL_OLAP_SCAN, olapTable, qualifier, - groupExpression, logicalProperties, physicalProperties, statistics); + groupExpression, logicalProperties, physicalProperties, statistics, operativeSlots); this.selectedIndexId = selectedIndexId; this.selectedTabletIds = ImmutableList.copyOf(selectedTabletIds); this.selectedPartitionIds = ImmutableList.copyOf(selectedPartitionIds); @@ -248,4 +248,9 @@ public CatalogRelation withOperativeSlots(Collection operativeSlots) { groupExpression, getLogicalProperties(), getPhysicalProperties(), statistics, tableSample, operativeSlots); } + + @Override + public List getOperativeSlots() { + return operativeSlots; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSchemaScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSchemaScan.java index f9dd821f859cdc..9d8d16686c067f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSchemaScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSchemaScan.java @@ -28,6 +28,8 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import com.google.common.collect.ImmutableList; + import java.util.List; import java.util.Objects; import java.util.Optional; @@ -44,7 +46,8 @@ public class PhysicalSchemaScan extends PhysicalCatalogRelation { public PhysicalSchemaScan(RelationId id, TableIf table, List qualifier, Optional groupExpression, LogicalProperties logicalProperties, Optional schemaCatalog, Optional schemaDatabase, Optional schemaTable) { - super(id, PlanType.PHYSICAL_SCHEMA_SCAN, table, qualifier, groupExpression, logicalProperties); + super(id, PlanType.PHYSICAL_SCHEMA_SCAN, table, qualifier, groupExpression, logicalProperties, + ImmutableList.of()); this.schemaCatalog = schemaCatalog; this.schemaDatabase = schemaDatabase; this.schemaTable = schemaTable; @@ -55,7 +58,7 @@ public PhysicalSchemaScan(RelationId id, TableIf table, List qualifier, PhysicalProperties physicalProperties, Statistics statistics, Optional schemaCatalog, Optional schemaDatabase, Optional schemaTable) { super(id, PlanType.PHYSICAL_SCHEMA_SCAN, table, qualifier, groupExpression, - logicalProperties, physicalProperties, statistics); + logicalProperties, physicalProperties, statistics, ImmutableList.of()); this.schemaCatalog = schemaCatalog; this.schemaDatabase = schemaDatabase; this.schemaTable = schemaTable; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java index 6637e02ca118dc..2385c0601e633f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java @@ -29,6 +29,7 @@ import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.List; @@ -44,7 +45,7 @@ public class PhysicalStorageLayerAggregate extends PhysicalCatalogRelation { public PhysicalStorageLayerAggregate(PhysicalCatalogRelation relation, PushDownAggOp aggOp) { super(relation.getRelationId(), relation.getType(), relation.getTable(), relation.getQualifier(), - Optional.empty(), relation.getLogicalProperties()); + Optional.empty(), relation.getLogicalProperties(), ImmutableList.of()); this.relation = Objects.requireNonNull(relation, "relation cannot be null"); this.aggOp = Objects.requireNonNull(aggOp, "aggOp cannot be null"); } @@ -53,7 +54,7 @@ public PhysicalStorageLayerAggregate(PhysicalCatalogRelation relation, PushDownA Optional groupExpression, LogicalProperties logicalProperties, PhysicalProperties physicalProperties, Statistics statistics) { super(relation.getRelationId(), relation.getType(), relation.getTable(), relation.getQualifier(), - groupExpression, logicalProperties, physicalProperties, statistics); + groupExpression, logicalProperties, physicalProperties, statistics, ImmutableList.of()); this.relation = Objects.requireNonNull(relation, "relation cannot be null"); this.aggOp = Objects.requireNonNull(aggOp, "aggOp cannot be null"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/DefaultPlanRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/DefaultPlanRewriter.java index 3a30191d2c020b..573de31a2131b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/DefaultPlanRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/DefaultPlanRewriter.java @@ -59,11 +59,10 @@ public static

P visitChildren(DefaultPlanRewriter rewrite } if (hasNewChildren) { + P originPlan = plan; plan = (P) plan.withChildren(newChildren.build()); - if (plan instanceof AbstractPhysicalPlan) { - AbstractPhysicalPlan physicalPlan = (AbstractPhysicalPlan) plan; - plan = (P) ((AbstractPhysicalPlan) physicalPlan.withChildren(newChildren.build())) - .copyStatsAndGroupIdFrom(physicalPlan); + if (originPlan instanceof AbstractPhysicalPlan) { + plan = (P) ((AbstractPhysicalPlan) plan).copyStatsAndGroupIdFrom((AbstractPhysicalPlan) originPlan); } } return plan; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java index 84091d6ce558ab..ebb84f692a4da0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java @@ -69,6 +69,9 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate; import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalIntersect; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterialize; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeFileScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalLimit; import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; @@ -271,6 +274,18 @@ public R visitLogicalDeferMaterializeTopN(LogicalDeferMaterializeTopN materialize, C context) { + return visit(materialize, context); + } + + public R visitPhysicalLazyMaterializeFileScan(PhysicalLazyMaterializeFileScan scan, C context) { + return visit(scan, context); + } + + public R visitPhysicalLazyMaterializeOlapScan(PhysicalLazyMaterializeOlapScan scan, C context) { + return visit(scan, context); + } + public R visitLogicalWindow(LogicalWindow window, C context) { return visit(window, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java index 910cf010cb1658..b483c5a2f02758 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java @@ -1111,4 +1111,23 @@ public Expression visitUnboundSlot(UnboundSlot unboundSlot, Void ctx) { return new StringLiteral(unboundSlot.getName()); } } + + /** + * format a list of slots + */ + public static String slotListShapeInfo(List materializedSlots) { + StringBuilder shapeBuilder = new StringBuilder(); + shapeBuilder.append("("); + boolean isFirst = true; + for (Slot slot : materializedSlots) { + if (isFirst) { + shapeBuilder.append(slot.shapeInfo()); + isFirst = false; + } else { + shapeBuilder.append(",").append(slot.shapeInfo()); + } + } + shapeBuilder.append(")"); + return shapeBuilder.toString(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializationNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializationNode.java new file mode 100644 index 00000000000000..836a2879c1e211 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializationNode.java @@ -0,0 +1,210 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.planner; + +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.TupleDescriptor; +import org.apache.doris.catalog.Column; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.resource.computegroup.ComputeGroup; +import org.apache.doris.statistics.StatisticalType; +import org.apache.doris.system.Backend; +import org.apache.doris.system.BeSelectionPolicy; +import org.apache.doris.thrift.TColumn; +import org.apache.doris.thrift.TExplainLevel; +import org.apache.doris.thrift.TMaterializationNode; +import org.apache.doris.thrift.TNodeInfo; +import org.apache.doris.thrift.TPaloNodesInfo; +import org.apache.doris.thrift.TPlanNode; +import org.apache.doris.thrift.TPlanNodeType; + +import java.util.ArrayList; +import java.util.List; + +/** + * struct TMaterializationNode { + * // A Materialization materializes all tuple + * // 如果 child 的output 是[a, row_id1, b, row_id2], + * // row_id1 生成 e f + * // row_id2 生成 c d + * // 那么 tuple_id = [a, e, f, b, c, d] + * 1: optional Types.TTupleId tuple_id + * + * // Nodes in this cluster, used for second phase fetch, BE 节点信息 + * 2: optional Descriptors.TPaloNodesInfo nodes_info; + * + * // Separate list of expr for fetch data + * // row_id 字段对应的slotRef 的列表: [row_id1, row_id2] + * 3: optional list> fetch_expr_lists + * // Fetch schema + * //[[e, f],[c, d]], + * 4: optional list> column_descs_lists; + * + * // 和column_descs_lists 对应,slot_locs_lists=[[1, 2], [4, 5]], + * // 其中 1表示e在tuple_id 中的第1号slotDesc,f 对应tuple_id 中第2号 slotDesc + * 5: optional list> slot_locs_lists; + * + * // Whether fetch row store + * // fe根据是否有行存 以及 延迟物化列和总列数的比例 延迟物化列数 判断是否使用行存优化 + * 6: optional list fetch_row_store; + * + * 7:bool do_gc = false; // 最靠近root的 MaterializeNode设置为true,其它M 设置为false + * + * 类似于 slot_locs_lists, 不过其中的数字表示在 table 中的第几列 + * 8. optional list> table_idx_lists; + * + * } + */ +public class MaterializationNode extends PlanNode { + private TPaloNodesInfo nodesInfo; + private TupleDescriptor materializeTupleDescriptor; + + private List rowIds; + + private List> lazyColumns; + + private List> locations; + private List> idxs; + + private List rowStoreFlags; + + private boolean isTopMaterializeNode; + + public MaterializationNode(PlanNodeId id, TupleDescriptor desc, PlanNode child) { + super(id, desc.getId().asList(), "MaterializeNode", StatisticalType.DEFAULT); + this.materializeTupleDescriptor = desc; + initNodeInfo(); + this.children.add(child); + } + + public void initNodeInfo() { + BeSelectionPolicy policy = new BeSelectionPolicy.Builder() + .needQueryAvailable() + .setRequireAliveBe() + .build(); + nodesInfo = new TPaloNodesInfo(); + ConnectContext context = ConnectContext.get(); + if (context == null) { + context = new ConnectContext(); + } + ComputeGroup computeGroup = context.getComputeGroupSafely(); + for (Backend backend : policy.getCandidateBackends(computeGroup.getBackendList())) { + nodesInfo.addToNodes(new TNodeInfo(backend.getId(), 0, backend.getHost(), backend.getBrpcPort())); + } + } + + @Override + public String getNodeExplainString(String detailPrefix, TExplainLevel detailLevel) { + StringBuilder output = new StringBuilder(); + output.append(detailPrefix) + .append("materialize tuple id:") + .append(materializeTupleDescriptor.getId()).append("\n"); + + if (!projectList.isEmpty()) { + output.append(detailPrefix) + .append("output tuple id:").append(outputTupleDesc.getId()).append("\n"); + + output.append(detailPrefix) + .append("projectList:").append(projectList.toString()).append("\n"); + } + output.append(detailPrefix).append("column_descs_lists").append(lazyColumns).append("\n"); + output.append(detailPrefix).append("locations: ").append(locations).append("\n"); + output.append(detailPrefix).append("table_idxs: ").append(idxs).append("\n"); + output.append(detailPrefix).append("row_ids: ").append(rowIds).append("\n"); + output.append(detailPrefix).append("isTopMaterializeNode: ").append(isTopMaterializeNode).append("\n"); + return output.toString(); + } + + @Override + protected void toThrift(TPlanNode msg) { + msg.node_type = TPlanNodeType.MATERIALIZATION_NODE; + msg.materialization_node = new TMaterializationNode(); + msg.materialization_node.setTupleId(tupleIds.get(0).asInt()); + msg.materialization_node.setIntermediateTupleId(materializeTupleDescriptor.getId().asInt()); + msg.materialization_node.setNodesInfo(nodesInfo); + msg.materialization_node.setFetchExprLists(Expr.treesToThrift(rowIds)); + + List> thriftCols = new ArrayList<>(); + for (List cols : lazyColumns) { + List array = new ArrayList<>(); + for (Column col : cols) { + array.add(col.toThrift()); + } + thriftCols.add(array); + } + msg.materialization_node.setColumnDescsLists(thriftCols); + + msg.materialization_node.setSlotLocsLists(locations); + msg.materialization_node.setColumnIdxsLists(idxs); + msg.materialization_node.setFetchRowStores(rowStoreFlags); + msg.materialization_node.setGcIdMap(isTopMaterializeNode); + } + + public void setRowIds(List rowIds) { + this.rowIds = rowIds; + } + + public void setLazyColumns(List> lazyColumns) { + this.lazyColumns = lazyColumns; + } + + public void setLocations(List> locations) { + this.locations = locations; + } + + public void setIdxs(List> idxs) { + this.idxs = idxs; + } + + public void setRowStoreFlags(List rowStoreFlags) { + this.rowStoreFlags = rowStoreFlags; + } + + public void setTopMaterializeNode(boolean topMaterializeNode) { + isTopMaterializeNode = topMaterializeNode; + } + + public TupleDescriptor getMaterializeTupleDescriptor() { + return materializeTupleDescriptor; + } + + public List getRowIds() { + return rowIds; + } + + public List> getLazyColumns() { + return lazyColumns; + } + + public List> getLocations() { + return locations; + } + + public List getRowStoreFlags() { + return rowStoreFlags; + } + + public boolean isTopMaterializeNode() { + return isTopMaterializeNode; + } + + @Override + public boolean isSerialOperator() { + return true; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index a6fc01562ab095..9fc80b1563a26b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -215,6 +215,11 @@ public class OlapScanNode extends ScanNode { private final PartitionPruneV2ForShortCircuitPlan cachedPartitionPruner = new PartitionPruneV2ForShortCircuitPlan(); + private boolean isTopnLazyMaterialize = false; + private List topnLazyMaterializeOutputColumns = new ArrayList<>(); + + private Column globalRowIdColumn; + // Constructs node to scan given data files of table 'tbl'. public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { super(id, desc, planNodeName, StatisticalType.OLAP_SCAN_NODE); @@ -1420,7 +1425,7 @@ public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { output.append(prefix).append("rewrittenProjectList: ").append( getExplainString(rewrittenProjectList)).append("\n"); } - + output.append(prefix).append("desc: ").append(desc.getId().asInt()).append("\n"); return output.toString(); } @@ -1510,15 +1515,13 @@ protected void toThrift(TPlanNode msg) { List keyColumnNames = new ArrayList(); List keyColumnTypes = new ArrayList(); List columnsDesc = new ArrayList(); - olapTable.getColumnDesc(selectedIndexId, columnsDesc, keyColumnNames, keyColumnTypes); List indexDesc = Lists.newArrayList(); - - // Add extra row id column - ArrayList slots = desc.getSlots(); - Column lastColumn = slots.get(slots.size() - 1).getColumn(); - if (lastColumn != null && lastColumn.getName().equalsIgnoreCase(Column.ROWID_COL)) { - TColumn tColumn = new TColumn(); - tColumn.setColumnName(Column.ROWID_COL); + if (isTopnLazyMaterialize) { + Set materializedColumnNames = topnLazyMaterializeOutputColumns.stream() + .map(Column::getName).collect(Collectors.toSet()); + olapTable.getColumnDesc(selectedIndexId, columnsDesc, keyColumnNames, keyColumnTypes, + materializedColumnNames); + TColumn tColumn = globalRowIdColumn.toThrift(); tColumn.setColumnType(ScalarType.createStringType().toColumnTypeThrift()); tColumn.setAggregationType(AggregateType.REPLACE.toThrift()); tColumn.setIsKey(false); @@ -1527,8 +1530,25 @@ protected void toThrift(TPlanNode msg) { tColumn.setVisible(false); tColumn.setColUniqueId(Integer.MAX_VALUE); columnsDesc.add(tColumn); + } else { + olapTable.getColumnDesc(selectedIndexId, columnsDesc, keyColumnNames, keyColumnTypes); + + // Add extra row id column + ArrayList slots = desc.getSlots(); + Column lastColumn = slots.get(slots.size() - 1).getColumn(); + if (lastColumn != null && lastColumn.getName().equalsIgnoreCase(Column.ROWID_COL)) { + TColumn tColumn = new TColumn(); + tColumn.setColumnName(Column.ROWID_COL); + tColumn.setColumnType(ScalarType.createStringType().toColumnTypeThrift()); + tColumn.setAggregationType(AggregateType.REPLACE.toThrift()); + tColumn.setIsKey(false); + tColumn.setIsAllowNull(false); + // keep compatibility + tColumn.setVisible(false); + tColumn.setColUniqueId(Integer.MAX_VALUE); + columnsDesc.add(tColumn); + } } - for (Index index : olapTable.getIndexes()) { TOlapTableIndex tIndex = index.toThrift(index.getColumnUniqueIds(olapTable.getBaseSchema())); indexDesc.add(tIndex); @@ -1930,4 +1950,28 @@ public boolean pushDownAggNoGroupingCheckCol(FunctionCallExpr aggExpr, Column co public int getScanRangeNum() { return getScanTabletIds().size(); } + + public boolean isTopnLazyMaterialize() { + return isTopnLazyMaterialize; + } + + public void setIsTopnLazyMaterialize(boolean isTopnLazyMaterialize) { + this.isTopnLazyMaterialize = isTopnLazyMaterialize; + } + + public void addTopnLazyMaterializeOutputColumns(Column column) { + this.topnLazyMaterializeOutputColumns.add(column); + } + + public List getTopnLazyMaterializeOutputColumns() { + return topnLazyMaterializeOutputColumns; + } + + public Column getGlobalRowIdColumn() { + return globalRowIdColumn; + } + + public void setGlobalRowIdColumn(Column globalRowIdColumn) { + this.globalRowIdColumn = globalRowIdColumn; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java index ea0b954dc5d259..69ffb602ae601d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java @@ -130,7 +130,9 @@ public static class RuntimeFilterTarget { public RuntimeFilterTarget(ScanNode targetNode, Expr targetExpr, boolean isBoundByKeyColumns, boolean isLocalTarget) { Preconditions.checkState(targetExpr.isBoundByTupleIds(targetNode.getTupleIds()) - || targetNode instanceof CTEScanNode); + || targetNode instanceof CTEScanNode, + "RuntimeFilter target " + expr + " is not bounded: slotDesc" + + (targetExpr instanceof SlotRef ? ((SlotRef) targetExpr).getTupleId() : "null")); this.node = targetNode; this.expr = targetExpr; this.isBoundByKeyColumns = isBoundByKeyColumns; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index d53f36f2c1aa57..110c64d398a20d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -89,7 +89,7 @@ public abstract class ScanNode extends PlanNode implements SplitGenerator { private static final Logger LOG = LogManager.getLogger(ScanNode.class); protected static final int NUM_SPLITS_PER_PARTITION = 10; protected static final int NUM_SPLITTERS_ON_FLIGHT = Config.max_external_cache_loader_thread_pool_size; - protected final TupleDescriptor desc; + protected TupleDescriptor desc; // for distribution prunner protected Map columnFilters = new CaseInsensitiveMap(); // Use this if partition_prune_algorithm_version is 2. @@ -870,4 +870,8 @@ public boolean isSerialOperator() { public boolean hasSerialScanChildren() { return isSerialOperator(); } + + public void setDesc(TupleDescriptor desc) { + this.desc = desc; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java index e5af0486396b24..4a014bd3834fb3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java @@ -28,6 +28,7 @@ import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.SortInfo; import org.apache.doris.common.NotImplementedException; +import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.StatisticalType; @@ -83,6 +84,9 @@ public class SortNode extends PlanNode { private ArrayList nullabilityChangedFlags = Lists.newArrayList(); + // topn filter target: ScanNode id + slot desc + private List> topnFilterTargets; + /** * Constructor. */ @@ -215,7 +219,7 @@ public String getNodeExplainString(String detailPrefix, TExplainLevel detailLeve output.append("\n"); if (useTopnOpt) { - output.append(detailPrefix + "TOPN OPT\n"); + output.append(detailPrefix + "TOPN filter targets: ").append(topnFilterTargets).append("\n"); } if (useTwoPhaseReadOpt) { output.append(detailPrefix + "OPT TWO PHASE\n"); @@ -414,4 +418,13 @@ public void setOffset(long offset) { super.setOffset(offset); updateSortAlgorithm(); } + + public List> getTopnFilterTargets() { + return topnFilterTargets; + } + + public void setTopnFilterTargets( + List> topnFilterTargets) { + this.topnFilterTargets = topnFilterTargets; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 4238f074fa8134..1b8b10e4a00e91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1333,6 +1333,11 @@ public enum IgnoreSplitType { @VariableMgr.VarAttr(name = USE_RF_DEFAULT) public boolean useRuntimeFilterDefaultSize = false; + @VariableMgr.VarAttr(name = "enable_topn_lazy_materialization", needForward = true, + fuzzy = false, + varType = VariableAnnotation.EXPERIMENTAL) + public boolean enableTopnLazyMaterialization = true; + @VariableMgr.VarAttr(name = DISABLE_INVERTED_INDEX_V1_FOR_VARIANT, needForward = true) private boolean disableInvertedIndexV1ForVaraint = true; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/PushDownFilterThroughProjectTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/PushDownFilterThroughProjectTest.java index d93b1111c19734..3af20769e9306e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/PushDownFilterThroughProjectTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/PushDownFilterThroughProjectTest.java @@ -132,7 +132,7 @@ public void testNotPushFilterWithNonfoldable(@Injectable LogicalProperties place PhysicalOlapScan scan = new PhysicalOlapScan(RelationId.createGenerator().getNextId(), t1, qualifier, 0L, Collections.emptyList(), Collections.emptyList(), null, PreAggStatus.on(), ImmutableList.of(), Optional.empty(), t1Properties, - Optional.empty(), ImmutableList.of()); + Optional.empty(), new ArrayList<>()); Alias x = new Alias(a, "x"); List projList3 = Lists.newArrayList(x, b, c); PhysicalProject proj3 = new PhysicalProject(projList3, placeHolder, scan); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java index 53edb726427fb9..261ac8a813ee21 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopNRuntimeFilterTest.java @@ -21,7 +21,6 @@ import org.apache.doris.nereids.processor.post.PlanPostProcessors; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.SortPhase; -import org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN; import org.apache.doris.nereids.util.MemoPatternMatchSupported; @@ -44,9 +43,10 @@ public void testUseTopNRf() { .implement(); PhysicalPlan plan = checker.getPhysicalPlan(); plan = new PlanPostProcessors(checker.getCascadesContext()).process(plan); - Assertions.assertInstanceOf(PhysicalDeferMaterializeTopN.class, plan.children().get(0).child(0)); - PhysicalDeferMaterializeTopN localTopN - = (PhysicalDeferMaterializeTopN) plan.child(0).child(0); + Plan rfSource = plan.child(0).child(0).child(0).child(0); + Assertions.assertInstanceOf(PhysicalTopN.class, rfSource); + PhysicalTopN localTopN + = (PhysicalTopN) rfSource; Assertions.assertTrue(checker.getCascadesContext().getTopnFilterContext().isTopnFilterSource(localTopN)); } @@ -58,11 +58,9 @@ public void testUseTopNRfForComplexCase() { .implement(); PhysicalPlan plan = checker.getPhysicalPlan(); plan = new PlanPostProcessors(checker.getCascadesContext()).process(plan); - Assertions.assertInstanceOf(PhysicalTopN.class, plan.child(0).child(1).child(0)); - Assertions.assertEquals(SortPhase.LOCAL_SORT, ((PhysicalTopN) plan - .child(0).child(1).child(0)).getSortPhase()); - PhysicalTopN localTopN = (PhysicalTopN) plan - .child(0).child(1).child(0); + Plan rfSource = plan.child(0).child(1).child(0).child(0).child(0); + Assertions.assertEquals(SortPhase.LOCAL_SORT, ((PhysicalTopN) rfSource).getSortPhase()); + PhysicalTopN localTopN = (PhysicalTopN) rfSource; Assertions.assertTrue(checker.getCascadesContext().getTopnFilterContext().isTopnFilterSource(localTopN)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopnLazyMaterializeTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopnLazyMaterializeTest.java new file mode 100644 index 00000000000000..c59367c9dd22f9 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/postprocess/TopnLazyMaterializeTest.java @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.postprocess; + +import org.apache.doris.nereids.datasets.ssb.SSBTestBase; +import org.apache.doris.nereids.glue.translator.PhysicalPlanTranslator; +import org.apache.doris.nereids.glue.translator.PlanTranslatorContext; +import org.apache.doris.nereids.processor.post.PlanPostProcessors; +import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.planner.PlanFragment; + +import org.junit.jupiter.api.Test; + +public class TopnLazyMaterializeTest extends SSBTestBase { + + @Override + public void runBeforeAll() throws Exception { + super.runBeforeAll(); + connectContext.getSessionVariable().setRuntimeFilterMode("Global"); + connectContext.getSessionVariable().setRuntimeFilterType(8); + connectContext.getSessionVariable().setEnableRuntimeFilterPrune(false); + connectContext.getSessionVariable().expandRuntimeFilterByInnerJoin = false; + } + + @Test + public void test() { + String sql = "select lineorder.*, dates.* from dates, lineorder where d_datekey > 19980101 and lo_orderdate = d_datekey order by d_date limit 10;"; + PlanChecker checker = PlanChecker.from(connectContext) + .analyze(sql) + .rewrite() + .implement(); + PhysicalPlan plan = checker.getPhysicalPlan(); + plan = new PlanPostProcessors(checker.getCascadesContext()).process(plan); + PlanFragment fragment = new PhysicalPlanTranslator(new PlanTranslatorContext(checker.getCascadesContext())).translatePlan(plan); + // MaterializationNode materializationNode = (MaterializationNode) fragment.getPlanRoot(); + System.out.println(fragment); + } +} diff --git a/gensrc/proto/internal_service.proto b/gensrc/proto/internal_service.proto index b5464eeafb3d02..f1136cc198c5d8 100644 --- a/gensrc/proto/internal_service.proto +++ b/gensrc/proto/internal_service.proto @@ -765,6 +765,42 @@ message PMultiGetResponse { repeated PRowLocation row_locs = 5; }; +// Eeach block have own schema to read +message PRequestBlockDesc { + optional bool fetch_row_store = 1; + repeated PSlotDescriptor slots = 2; + repeated ColumnPB column_descs = 3; + repeated uint32 file_id = 4; + repeated uint32 row_id = 5; + optional PTupleDescriptor desc = 6; + repeated uint32 column_idxs = 7; +} + +message PMultiGetRequestV2 { + repeated PRequestBlockDesc request_block_descs = 1; + + // for compability + optional int32 be_exec_version = 2; + optional PUniqueId query_id = 3; + optional bool gc_id_map = 4; + optional uint64 wg_id = 5; +}; + +message PMultiGetBlockV2 { + optional PBlock block = 1; + // more effecient serialization fields for row store + enum RowFormat { + JSONB = 0; + }; + optional RowFormat format = 2; + repeated bytes binary_row_data = 3; +} + +message PMultiGetResponseV2 { + optional PStatus status = 1; + repeated PMultiGetBlockV2 blocks = 2; +}; + message PFetchColIdsRequest { message PFetchColIdParam { required int64 indexId = 1; @@ -1035,6 +1071,7 @@ service PBackendService { rpc outfile_write_success(POutfileWriteSuccessRequest) returns (POutfileWriteSuccessResult); rpc fetch_table_schema(PFetchTableSchemaRequest) returns (PFetchTableSchemaResult); rpc multiget_data(PMultiGetRequest) returns (PMultiGetResponse); + rpc multiget_data_v2(PMultiGetRequestV2) returns (PMultiGetResponseV2); rpc get_file_cache_meta_by_tablet_id(PGetFileCacheMetaRequest) returns (PGetFileCacheMetaResponse); rpc tablet_fetch_data(PTabletKeyLookupRequest) returns (PTabletKeyLookupResponse); rpc get_column_ids_by_tablet_ids(PFetchColIdsRequest) returns (PFetchColIdsResponse); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index dfca38aa27cac8..f4b9746a31978e 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -58,7 +58,8 @@ enum TPlanNodeType { JDBC_SCAN_NODE, TEST_EXTERNAL_SCAN_NODE, PARTITION_SORT_NODE, - GROUP_COMMIT_SCAN_NODE + GROUP_COMMIT_SCAN_NODE, + MATERIALIZATION_NODE } struct TKeyRange { @@ -976,6 +977,27 @@ struct TRepeatNode { 6: required list exprs } +struct TMaterializationNode { + // Materialization node output tuple + 1: optional Types.TTupleId tuple_id + // Intertemporal materializes tuple + 2: optional Types.TTupleId intermediate_tuple_id + // Nodes in this cluster, used for second phase fetch + 3: optional Descriptors.TPaloNodesInfo nodes_info + // Separate list of expr for fetch data + 4: optional list fetch_expr_lists + // Fetch schema + 5: optional list> column_descs_lists; + // Add column in tuple offset + 6: optional list> slot_locs_lists; // [[1, 2], [4, 5]] + // Whether fetch row store + 7: optional list fetch_row_stores + // Whethe to clear id map + 8: optional bool gc_id_map + // 与 slot_locs_lists 类型 不过它代表的是 当前slot 在 表中的位置(第几列) + 9: optional list> column_idxs_lists; +} + struct TPreAggregationNode { 1: required list group_exprs 2: required list aggregate_exprs @@ -1368,6 +1390,7 @@ struct TPlanNode { // Runtime filters assigned to this plan node, exist in HashJoinNode and ScanNode 36: optional list runtime_filters 37: optional TGroupCommitScanNode group_commit_scan_node + 38: optional TMaterializationNode materialization_node // Use in vec exec engine 40: optional Exprs.TExpr vconjunct diff --git a/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out b/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out index fa260b9622133e..918710fdbbd831 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out +++ b/regression-test/data/external_table_p0/hive/test_hive_rename_column_orc_parquet.out @@ -433,3 +433,229 @@ false 60 yyy 100 yyyyyy true 70 hahaha 8888 abcd false 80 cmake 9999 efg +-- !rename_orc_1_true_limit -- +\N \N hello world \N \N +\N \N keep \N \N +true 30 abcd \N \N + +-- !rename_orc_2_true_limit -- +\N +\N +30 + +-- !rename_orc_3_true_limit -- +\N 2 +30 1 +40 1 + +-- !rename_orc_4_true_limit -- +true 30 abcd \N \N +true 50 xxx \N cols +true 70 hahaha 8888 abcd + +-- !rename_orc_5_true_limit -- +true 70 hahaha 8888 abcd +false 80 cmake 9999 efg + +-- !rename_orc_6_true_limit -- +\N \N hello world \N \N +\N \N keep \N \N +true 30 abcd \N \N + +-- !rename_orc_7_true_limit -- +true 30 abcd \N \N + +-- !rename_orc_8_true_limit -- +true +true +true + +-- !rename_orc_9_true_limit -- + +-- !rename_orc_10_true_limit -- + +-- !rename_orc_11_true_limit -- +\N \N +\N \N +30 true + +-- !rename_orc_12_true_limit -- +\N \N hello world \N \N +\N \N keep \N \N +\N \N abcd 30 true + +-- !rename_orc_13_true_limit -- +false 40 new adcd \N \N +true 50 xxx \N cols +false 60 yyy \N yyyyyy + +-- !rename_orc_1_false_limit -- +true 10 hello world \N \N +false 20 keep \N \N +true 30 abcd \N \N + +-- !rename_orc_2_false_limit -- +10 +20 +30 + +-- !rename_orc_3_false_limit -- +10 1 +20 1 +30 1 + +-- !rename_orc_4_false_limit -- +true 10 hello world \N \N +true 30 abcd \N \N +true 50 xxx 60 cols + +-- !rename_orc_5_false_limit -- +true 50 xxx 60 cols +false 60 yyy 100 yyyyyy +true 70 hahaha 8888 abcd + +-- !rename_orc_6_false_limit -- +true 10 hello world \N \N +false 20 keep \N \N +true 30 abcd \N \N + +-- !rename_orc_7_false_limit -- +true 30 abcd \N \N + +-- !rename_orc_8_false_limit -- +true +true +true + +-- !rename_orc_9_false_limit -- + +-- !rename_orc_10_false_limit -- + +-- !rename_orc_11_false_limit -- +10 true +20 false +30 true + +-- !rename_orc_12_false_limit -- +\N \N hello world 10 true +\N \N keep 20 false +\N \N abcd 30 true + +-- !rename_orc_13_false_limit -- +true 10 hello world \N \N +false 20 keep \N \N +false 40 new adcd \N \N + +-- !rename_parquet_1_true_limit -- +\N \N hello world \N \N +\N \N keep \N \N +true 30 abcd \N \N + +-- !rename_parquet_2_true_limit -- +\N +\N +30 + +-- !rename_parquet_3_true_limit -- +\N 2 +30 1 +40 1 + +-- !rename_parquet_4_true_limit -- +true 30 abcd \N \N +true 50 xxx \N cols +true 70 hahaha 8888 abcd + +-- !rename_parquet_5_true_limit -- +true 70 hahaha 8888 abcd +false 80 cmake 9999 efg + +-- !rename_parquet_6_true_limit -- +\N \N hello world \N \N +\N \N keep \N \N +true 30 abcd \N \N + +-- !rename_parquet_7_true_limit -- +true 30 abcd \N \N + +-- !rename_parquet_8_true_limit -- +true +true +true + +-- !rename_parquet_9_true_limit -- + +-- !rename_parquet_10_true_limit -- + +-- !rename_parquet_11_true_limit -- +\N \N +\N \N +30 true + +-- !rename_parquet_12_true_limit -- +\N \N hello world \N \N +\N \N keep \N \N +\N \N abcd 30 true + +-- !rename_parquet_13_true_limit -- +false 40 new adcd \N \N +true 50 xxx \N cols +false 60 yyy \N yyyyyy + +-- !rename_parquet_1_false_limit -- +true 10 hello world \N \N +false 20 keep \N \N +true 30 abcd \N \N + +-- !rename_parquet_2_false_limit -- +10 +20 +30 + +-- !rename_parquet_3_false_limit -- +10 1 +20 1 +30 1 + +-- !rename_parquet_4_false_limit -- +true 10 hello world \N \N +true 30 abcd \N \N +true 50 xxx 60 cols + +-- !rename_parquet_5_false_limit -- +true 50 xxx 60 cols +false 60 yyy 100 yyyyyy +true 70 hahaha 8888 abcd + +-- !rename_parquet_6_false_limit -- +true 10 hello world \N \N +false 20 keep \N \N +true 30 abcd \N \N + +-- !rename_parquet_7_false_limit -- +true 30 abcd \N \N + +-- !rename_parquet_8_false_limit -- +true +true +true + +-- !rename_parquet_9_false_limit -- + +-- !rename_parquet_10_false_limit -- + +-- !rename_parquet_11_false_limit -- +10 true +20 false +30 true + +-- !rename_parquet_12_false_limit -- +\N \N hello world 10 true +\N \N keep 20 false +\N \N abcd 30 true + +-- !rename_parquet_13_false_limit -- +true 10 hello world \N \N +false 20 keep \N \N +false 40 new adcd \N \N + diff --git a/regression-test/data/external_table_p0/hive/test_hive_topn_lazy_mat.out b/regression-test/data/external_table_p0/hive/test_hive_topn_lazy_mat.out new file mode 100644 index 00000000000000..d90748b636b757 --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_topn_lazy_mat.out @@ -0,0 +1,1193 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !2 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !3 -- +0.5 1.0 false user1 +1.0 2.0 true user2 +1.5 3.0 false user3 +2.0 4.0 true user4 +2.5 5.0 false user5 +3.0 6.0 true user6 +3.5 7.0 false user7 +4.0 8.0 true user8 +4.5 9.0 false user9 +5.0 10.0 true user10 + +-- !4 -- +1.0 user1 1 1 +10.0 user10 10 2 +11.0 user11 11 2 +12.0 user12 12 2 +13.0 user13 13 2 +14.0 user14 14 2 +15.0 user15 15 2 +16.0 user16 16 2 +17.0 user17 17 2 +18.0 user18 18 2 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 +11 user11 11.0 false 5.5 2 +12 user12 12.0 true 6.0 2 +13 user13 13.0 false 6.5 2 +14 user14 14.0 true 7.0 2 +15 user15 15.0 false 7.5 2 +16 user16 16.0 true 8.0 2 +17 user17 17.0 false 8.5 2 +18 user18 18.0 true 9.0 2 +19 user19 19.0 false 9.5 2 +20 user20 20.0 true 10.0 2 +21 user21 21.0 false 10.5 2 +22 user22 22.0 true 11.0 2 +23 user23 23.0 false 11.5 2 +24 user24 24.0 true 12.0 2 +25 user25 25.0 false 12.5 2 +26 user26 26.0 true 13.0 2 +27 user27 27.0 false 13.5 2 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 +18 user18 9.0 +20 user20 10.0 +22 user22 11.0 +24 user24 12.0 +26 user26 13.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 +9 user9 9.0 4.5 +10 user10 10.0 5.0 +11 user11 11.0 5.5 +12 user12 12.0 6.0 +13 user13 13.0 6.5 +14 user14 14.0 7.0 +15 user15 15.0 7.5 +16 user16 16.0 8.0 +17 user17 17.0 8.5 +18 user18 18.0 9.0 +19 user19 19.0 9.5 +20 user20 20.0 10.0 +21 user21 21.0 10.5 +22 user22 22.0 11.0 +23 user23 23.0 11.5 +24 user24 24.0 12.0 +25 user25 25.0 12.5 +26 user26 26.0 13.0 +27 user27 27.0 13.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 +18 user18 27.0 +20 user20 30.0 +22 user22 33.0 +24 user24 36.0 +26 user26 39.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 +9 4.5 9.0 +10 5.0 10.0 +11 5.5 11.0 +12 6.0 12.0 +13 6.5 13.0 +14 7.0 14.0 +15 7.5 15.0 +16 8.0 16.0 +17 8.5 17.0 +18 9.0 18.0 +19 9.5 19.0 +20 10.0 20.0 +21 10.5 21.0 +22 11.0 22.0 +23 11.5 23.0 +24 12.0 24.0 +25 12.5 25.0 +26 13.0 26.0 +27 13.5 27.0 + +-- !1 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !2 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !3 -- +0.5 1.0 false user1 +1.0 2.0 true user2 +1.5 3.0 false user3 +2.0 4.0 true user4 +2.5 5.0 false user5 +3.0 6.0 true user6 +3.5 7.0 false user7 +4.0 8.0 true user8 +4.5 9.0 false user9 +5.0 10.0 true user10 + +-- !4 -- +1.0 user1 1 1 +10.0 user10 10 2 +11.0 user11 11 2 +12.0 user12 12 2 +13.0 user13 13 2 +14.0 user14 14 2 +15.0 user15 15 2 +16.0 user16 16 2 +17.0 user17 17 2 +18.0 user18 18 2 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 +11 user11 11.0 false 5.5 2 +12 user12 12.0 true 6.0 2 +13 user13 13.0 false 6.5 2 +14 user14 14.0 true 7.0 2 +15 user15 15.0 false 7.5 2 +16 user16 16.0 true 8.0 2 +17 user17 17.0 false 8.5 2 +18 user18 18.0 true 9.0 2 +19 user19 19.0 false 9.5 1 +20 user20 20.0 true 10.0 1 +21 user21 21.0 false 10.5 1 +22 user22 22.0 true 11.0 1 +23 user23 23.0 false 11.5 1 +24 user24 24.0 true 12.0 1 +25 user25 25.0 false 12.5 1 +26 user26 26.0 true 13.0 1 +27 user27 27.0 false 13.5 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 +18 user18 9.0 +20 user20 10.0 +22 user22 11.0 +24 user24 12.0 +26 user26 13.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 +9 user9 9.0 4.5 +10 user10 10.0 5.0 +11 user11 11.0 5.5 +12 user12 12.0 6.0 +13 user13 13.0 6.5 +14 user14 14.0 7.0 +15 user15 15.0 7.5 +16 user16 16.0 8.0 +17 user17 17.0 8.5 +18 user18 18.0 9.0 +19 user19 19.0 9.5 +20 user20 20.0 10.0 +21 user21 21.0 10.5 +22 user22 22.0 11.0 +23 user23 23.0 11.5 +24 user24 24.0 12.0 +25 user25 25.0 12.5 +26 user26 26.0 13.0 +27 user27 27.0 13.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 +18 user18 27.0 +20 user20 30.0 +22 user22 33.0 +24 user24 36.0 +26 user26 39.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 +9 4.5 9.0 +10 5.0 10.0 +11 5.5 11.0 +12 6.0 12.0 +13 6.5 13.0 +14 7.0 14.0 +15 7.5 15.0 +16 8.0 16.0 +17 8.5 17.0 +18 9.0 18.0 +19 9.5 19.0 +20 10.0 20.0 +21 10.5 21.0 +22 11.0 22.0 +23 11.5 23.0 +24 12.0 24.0 +25 12.5 25.0 +26 13.0 26.0 +27 13.5 27.0 + +-- !test_join1 -- +2 user2 2.0 1.0 +4 user4 4.0 2.0 +6 user6 6.0 3.0 + +-- !test_join2 -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 + +-- !test_complex -- +2 user2 2.0 1.0 2.0 +4 user4 4.0 2.0 4.0 +6 user6 6.0 3.0 6.0 + +-- !test_join1 -- +2 user2 2.0 1.0 +4 user4 4.0 2.0 +6 user6 6.0 3.0 +8 user8 8.0 4.0 +10 user10 10.0 5.0 +12 user12 12.0 6.0 +14 user14 14.0 7.0 +16 user16 16.0 8.0 + +-- !test_join2 -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 + +-- !test_complex -- +2 user2 2.0 1.0 2.0 +4 user4 4.0 2.0 4.0 +6 user6 6.0 3.0 6.0 +8 user8 8.0 4.0 8.0 +10 user10 10.0 5.0 10.0 +12 user12 12.0 6.0 12.0 +14 user14 14.0 7.0 14.0 +16 user16 16.0 8.0 16.0 + +-- !test_join1 -- +2 user2 2.0 1.0 +4 user4 4.0 2.0 +6 user6 6.0 3.0 +8 user8 8.0 4.0 +10 user10 10.0 5.0 +12 user12 12.0 6.0 +14 user14 14.0 7.0 +16 user16 16.0 8.0 +18 user18 18.0 9.0 +20 user20 20.0 10.0 +22 user22 22.0 11.0 +24 user24 24.0 12.0 +26 user26 26.0 13.0 + +-- !test_join2 -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 +9 user9 9.0 4.5 +10 user10 10.0 5.0 +11 user11 11.0 5.5 +12 user12 12.0 6.0 +13 user13 13.0 6.5 +14 user14 14.0 7.0 +15 user15 15.0 7.5 +16 user16 16.0 8.0 +17 user17 17.0 8.5 +18 user18 18.0 9.0 +19 user19 19.0 \N +20 user20 20.0 \N +21 user21 21.0 \N +22 user22 22.0 \N +23 user23 23.0 \N +24 user24 24.0 \N +25 user25 25.0 \N +26 user26 26.0 \N +27 user27 27.0 \N + +-- !test_complex -- +2 user2 2.0 1.0 2.0 +4 user4 4.0 2.0 4.0 +6 user6 6.0 3.0 6.0 +8 user8 8.0 4.0 8.0 +10 user10 10.0 5.0 10.0 +12 user12 12.0 6.0 12.0 +14 user14 14.0 7.0 14.0 +16 user16 16.0 8.0 16.0 +18 user18 18.0 9.0 18.0 +20 user20 20.0 10.0 20.0 +22 user22 22.0 11.0 22.0 +24 user24 24.0 12.0 24.0 +26 user26 26.0 13.0 26.0 + +-- !1 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !2 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !3 -- +0.5 1.0 false user1 +1.0 2.0 true user2 +1.5 3.0 false user3 +2.0 4.0 true user4 +2.5 5.0 false user5 +3.0 6.0 true user6 +3.5 7.0 false user7 +4.0 8.0 true user8 +4.5 9.0 false user9 +5.0 10.0 true user10 + +-- !4 -- +1.0 user1 1 1 +10.0 user10 10 2 +11.0 user11 11 2 +12.0 user12 12 2 +13.0 user13 13 2 +14.0 user14 14 2 +15.0 user15 15 2 +16.0 user16 16 2 +17.0 user17 17 2 +18.0 user18 18 2 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 +11 user11 11.0 false 5.5 2 +12 user12 12.0 true 6.0 2 +13 user13 13.0 false 6.5 2 +14 user14 14.0 true 7.0 2 +15 user15 15.0 false 7.5 2 +16 user16 16.0 true 8.0 2 +17 user17 17.0 false 8.5 2 +18 user18 18.0 true 9.0 2 +19 user19 19.0 false 9.5 2 +20 user20 20.0 true 10.0 2 +21 user21 21.0 false 10.5 2 +22 user22 22.0 true 11.0 2 +23 user23 23.0 false 11.5 2 +24 user24 24.0 true 12.0 2 +25 user25 25.0 false 12.5 2 +26 user26 26.0 true 13.0 2 +27 user27 27.0 false 13.5 2 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 +18 user18 9.0 +20 user20 10.0 +22 user22 11.0 +24 user24 12.0 +26 user26 13.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 +9 user9 9.0 4.5 +10 user10 10.0 5.0 +11 user11 11.0 5.5 +12 user12 12.0 6.0 +13 user13 13.0 6.5 +14 user14 14.0 7.0 +15 user15 15.0 7.5 +16 user16 16.0 8.0 +17 user17 17.0 8.5 +18 user18 18.0 9.0 +19 user19 19.0 9.5 +20 user20 20.0 10.0 +21 user21 21.0 10.5 +22 user22 22.0 11.0 +23 user23 23.0 11.5 +24 user24 24.0 12.0 +25 user25 25.0 12.5 +26 user26 26.0 13.0 +27 user27 27.0 13.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 +18 user18 27.0 +20 user20 30.0 +22 user22 33.0 +24 user24 36.0 +26 user26 39.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 +9 4.5 9.0 +10 5.0 10.0 +11 5.5 11.0 +12 6.0 12.0 +13 6.5 13.0 +14 7.0 14.0 +15 7.5 15.0 +16 8.0 16.0 +17 8.5 17.0 +18 9.0 18.0 +19 9.5 19.0 +20 10.0 20.0 +21 10.5 21.0 +22 11.0 22.0 +23 11.5 23.0 +24 12.0 24.0 +25 12.5 25.0 +26 13.0 26.0 +27 13.5 27.0 + +-- !1 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !2 -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 + +-- !3 -- +0.5 1.0 false user1 +1.0 2.0 true user2 +1.5 3.0 false user3 +2.0 4.0 true user4 +2.5 5.0 false user5 +3.0 6.0 true user6 +3.5 7.0 false user7 +4.0 8.0 true user8 +4.5 9.0 false user9 +5.0 10.0 true user10 + +-- !4 -- +1.0 user1 1 1 +10.0 user10 10 2 +11.0 user11 11 2 +12.0 user12 12 2 +13.0 user13 13 2 +14.0 user14 14 2 +15.0 user15 15 2 +16.0 user16 16 2 +17.0 user17 17 2 +18.0 user18 18 2 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 + +-- !test_basic -- +1 user1 1.0 false 0.5 1 +2 user2 2.0 true 1.0 1 +3 user3 3.0 false 1.5 1 +4 user4 4.0 true 2.0 1 +5 user5 5.0 false 2.5 1 +6 user6 6.0 true 3.0 1 +7 user7 7.0 false 3.5 1 +8 user8 8.0 true 4.0 1 +9 user9 9.0 false 4.5 1 +10 user10 10.0 true 5.0 2 +11 user11 11.0 false 5.5 2 +12 user12 12.0 true 6.0 2 +13 user13 13.0 false 6.5 2 +14 user14 14.0 true 7.0 2 +15 user15 15.0 false 7.5 2 +16 user16 16.0 true 8.0 2 +17 user17 17.0 false 8.5 2 +18 user18 18.0 true 9.0 2 +19 user19 19.0 false 9.5 1 +20 user20 20.0 true 10.0 1 +21 user21 21.0 false 10.5 1 +22 user22 22.0 true 11.0 1 +23 user23 23.0 false 11.5 1 +24 user24 24.0 true 12.0 1 +25 user25 25.0 false 12.5 1 +26 user26 26.0 true 13.0 1 +27 user27 27.0 false 13.5 1 + +-- !test_partial -- +2 user2 1.0 +4 user4 2.0 +6 user6 3.0 +8 user8 4.0 +10 user10 5.0 +12 user12 6.0 +14 user14 7.0 +16 user16 8.0 +18 user18 9.0 +20 user20 10.0 +22 user22 11.0 +24 user24 12.0 +26 user26 13.0 + +-- !test_multi_sort -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 +9 user9 9.0 4.5 +10 user10 10.0 5.0 +11 user11 11.0 5.5 +12 user12 12.0 6.0 +13 user13 13.0 6.5 +14 user14 14.0 7.0 +15 user15 15.0 7.5 +16 user16 16.0 8.0 +17 user17 17.0 8.5 +18 user18 18.0 9.0 +19 user19 19.0 9.5 +20 user20 20.0 10.0 +21 user21 21.0 10.5 +22 user22 22.0 11.0 +23 user23 23.0 11.5 +24 user24 24.0 12.0 +25 user25 25.0 12.5 +26 user26 26.0 13.0 +27 user27 27.0 13.5 + +-- !test_filter -- + +-- !test_subquery -- +2 user2 3.0 +4 user4 6.0 +6 user6 9.0 +8 user8 12.0 +10 user10 15.0 +12 user12 18.0 +14 user14 21.0 +16 user16 24.0 +18 user18 27.0 +20 user20 30.0 +22 user22 33.0 +24 user24 36.0 +26 user26 39.0 + +-- !test_agg -- +1 0.5 1.0 +2 1.0 2.0 +3 1.5 3.0 +4 2.0 4.0 +5 2.5 5.0 +6 3.0 6.0 +7 3.5 7.0 +8 4.0 8.0 +9 4.5 9.0 +10 5.0 10.0 +11 5.5 11.0 +12 6.0 12.0 +13 6.5 13.0 +14 7.0 14.0 +15 7.5 15.0 +16 8.0 16.0 +17 8.5 17.0 +18 9.0 18.0 +19 9.5 19.0 +20 10.0 20.0 +21 10.5 21.0 +22 11.0 22.0 +23 11.5 23.0 +24 12.0 24.0 +25 12.5 25.0 +26 13.0 26.0 +27 13.5 27.0 + +-- !test_join1 -- +2 user2 2.0 1.0 +4 user4 4.0 2.0 +6 user6 6.0 3.0 + +-- !test_join2 -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 + +-- !test_complex -- +2 user2 2.0 1.0 2.0 +4 user4 4.0 2.0 4.0 +6 user6 6.0 3.0 6.0 + +-- !test_join1 -- +2 user2 2.0 1.0 +4 user4 4.0 2.0 +6 user6 6.0 3.0 +8 user8 8.0 4.0 +10 user10 10.0 5.0 +12 user12 12.0 6.0 +14 user14 14.0 7.0 +16 user16 16.0 8.0 + +-- !test_join2 -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 + +-- !test_complex -- +2 user2 2.0 1.0 2.0 +4 user4 4.0 2.0 4.0 +6 user6 6.0 3.0 6.0 +8 user8 8.0 4.0 8.0 +10 user10 10.0 5.0 10.0 +12 user12 12.0 6.0 12.0 +14 user14 14.0 7.0 14.0 +16 user16 16.0 8.0 16.0 + +-- !test_join1 -- +2 user2 2.0 1.0 +4 user4 4.0 2.0 +6 user6 6.0 3.0 +8 user8 8.0 4.0 +10 user10 10.0 5.0 +12 user12 12.0 6.0 +14 user14 14.0 7.0 +16 user16 16.0 8.0 +18 user18 18.0 9.0 +20 user20 20.0 10.0 +22 user22 22.0 11.0 +24 user24 24.0 12.0 +26 user26 26.0 13.0 + +-- !test_join2 -- +1 user1 1.0 0.5 +2 user2 2.0 1.0 +3 user3 3.0 1.5 +4 user4 4.0 2.0 +5 user5 5.0 2.5 +6 user6 6.0 3.0 +7 user7 7.0 3.5 +8 user8 8.0 4.0 +9 user9 9.0 4.5 +10 user10 10.0 5.0 +11 user11 11.0 5.5 +12 user12 12.0 6.0 +13 user13 13.0 6.5 +14 user14 14.0 7.0 +15 user15 15.0 7.5 +16 user16 16.0 8.0 +17 user17 17.0 8.5 +18 user18 18.0 9.0 +19 user19 19.0 \N +20 user20 20.0 \N +21 user21 21.0 \N +22 user22 22.0 \N +23 user23 23.0 \N +24 user24 24.0 \N +25 user25 25.0 \N +26 user26 26.0 \N +27 user27 27.0 \N + +-- !test_complex -- +2 user2 2.0 1.0 2.0 +4 user4 4.0 2.0 4.0 +6 user6 6.0 3.0 6.0 +8 user8 8.0 4.0 8.0 +10 user10 10.0 5.0 10.0 +12 user12 12.0 6.0 12.0 +14 user14 14.0 7.0 14.0 +16 user16 16.0 8.0 16.0 +18 user18 18.0 9.0 18.0 +20 user20 20.0 10.0 20.0 +22 user22 22.0 11.0 22.0 +24 user24 24.0 12.0 24.0 +26 user26 26.0 13.0 26.0 + diff --git a/regression-test/data/external_table_p0/hive/test_transactional_hive.out b/regression-test/data/external_table_p0/hive/test_transactional_hive.out index 94e32a43db7f3b..d17ad0b7cd5124 100644 --- a/regression-test/data/external_table_p0/hive/test_transactional_hive.out +++ b/regression-test/data/external_table_p0/hive/test_transactional_hive.out @@ -122,6 +122,15 @@ F -- !16 -- 4 DD +-- !17 -- +1 A + +-- !18 -- +1 A + +-- !19 -- +4 DD + -- !count_1 -- 3 @@ -137,3 +146,18 @@ F -- !count_5 -- 3 +-- !q01_limit -- +1 A 20230101 +2 BB 20230101 +3 C 20230101 + +-- !q02_limit -- +A +BB +C + +-- !q03_limit -- +2 BB 20230101 + +-- !q04_limit -- + diff --git a/regression-test/data/query_p0/topn_lazy/topn_lazy.out b/regression-test/data/query_p0/topn_lazy/topn_lazy.out new file mode 100644 index 00000000000000..fb4a62359dd650 --- /dev/null +++ b/regression-test/data/query_p0/topn_lazy/topn_lazy.out @@ -0,0 +1,70 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_lazy1 -- +19920401 April 1, 1992 Thursday April 1992 199204 Apr1992 5 1 92 4 14 Spring 0 1 0 1 +19920410 April 10, 1992 Saturday April 1992 199204 Apr1992 7 10 101 4 15 Spring 1 1 0 0 +19920411 April 11, 1992 Sunday April 1992 199204 Apr1992 1 11 102 4 15 Spring 0 1 0 0 +19920412 April 12, 1992 Monday April 1992 199204 Apr1992 2 12 103 4 15 Spring 0 1 0 1 +19920413 April 13, 1992 Tuesday April 1992 199204 Apr1992 3 13 104 4 15 Spring 0 1 0 1 +19920414 April 14, 1992 Wednesday April 1992 199204 Apr1992 4 14 105 4 16 Spring 0 1 0 1 +19920415 April 15, 1992 Thursday April 1992 199204 Apr1992 5 15 106 4 16 Spring 0 1 0 1 +19920416 April 16, 1992 Friday April 1992 199204 Apr1992 6 16 107 4 16 Spring 0 1 0 1 +19920417 April 17, 1992 Saturday April 1992 199204 Apr1992 7 17 108 4 16 Spring 1 1 0 0 +19920418 April 18, 1992 Sunday April 1992 199204 Apr1992 1 18 109 4 16 Spring 0 1 0 0 + +-- !test_lazy2 -- +19920401 April 1, 1992 Thursday April 1992 199204 5 4 Spring +19920410 April 10, 1992 Saturday April 1992 199204 7 4 Spring +19920411 April 11, 1992 Sunday April 1992 199204 1 4 Spring +19920412 April 12, 1992 Monday April 1992 199204 2 4 Spring +19920413 April 13, 1992 Tuesday April 1992 199204 3 4 Spring +19920414 April 14, 1992 Wednesday April 1992 199204 4 4 Spring +19920415 April 15, 1992 Thursday April 1992 199204 5 4 Spring +19920416 April 16, 1992 Friday April 1992 199204 6 4 Spring +19920417 April 17, 1992 Saturday April 1992 199204 7 4 Spring +19920418 April 18, 1992 Sunday April 1992 199204 1 4 Spring + +-- !test_lazy3 -- +19920401 April 1, 1992 Thursday April 1992 199204 Apr1992 5 1 92 4 14 Spring 0 1 0 1 +19920410 April 10, 1992 Saturday April 1992 199204 Apr1992 7 10 101 4 15 Spring 1 1 0 0 +19920411 April 11, 1992 Sunday April 1992 199204 Apr1992 1 11 102 4 15 Spring 0 1 0 0 +19920412 April 12, 1992 Monday April 1992 199204 Apr1992 2 12 103 4 15 Spring 0 1 0 1 +19920413 April 13, 1992 Tuesday April 1992 199204 Apr1992 3 13 104 4 15 Spring 0 1 0 1 +19920414 April 14, 1992 Wednesday April 1992 199204 Apr1992 4 14 105 4 16 Spring 0 1 0 1 +19920415 April 15, 1992 Thursday April 1992 199204 Apr1992 5 15 106 4 16 Spring 0 1 0 1 +19920416 April 16, 1992 Friday April 1992 199204 Apr1992 6 16 107 4 16 Spring 0 1 0 1 +19920417 April 17, 1992 Saturday April 1992 199204 Apr1992 7 17 108 4 16 Spring 1 1 0 0 +19920418 April 18, 1992 Sunday April 1992 199204 Apr1992 1 18 109 4 16 Spring 0 1 0 0 + +-- !test_lazy4 -- +19920401 April 1, 1992 Thursday April 1992 199204 5 4 Spring +19920410 April 10, 1992 Saturday April 1992 199204 7 4 Spring +19920411 April 11, 1992 Sunday April 1992 199204 1 4 Spring +19920412 April 12, 1992 Monday April 1992 199204 2 4 Spring +19920413 April 13, 1992 Tuesday April 1992 199204 3 4 Spring +19920414 April 14, 1992 Wednesday April 1992 199204 4 4 Spring +19920415 April 15, 1992 Thursday April 1992 199204 5 4 Spring +19920416 April 16, 1992 Friday April 1992 199204 6 4 Spring +19920417 April 17, 1992 Saturday April 1992 199204 7 4 Spring +19920418 April 18, 1992 Sunday April 1992 199204 1 4 Spring + +-- !test_lazy5 -- +1 Alice 101 100.50 +2 Bob 102 200.75 +3 Charlie 103 150.00 +4 David \N \N +5 Eve \N \N + +-- !test_lazy6 -- +100.50 Alice 101 1 +200.75 Bob 102 2 +150.00 Charlie 103 3 +\N David \N 4 +\N Eve \N 5 + +-- !test_lazy7 -- +Alice 101 1 100.50 +Bob 102 2 200.75 +Charlie 103 3 150.00 +David \N 4 \N +Eve \N 5 \N + diff --git a/regression-test/data/query_p0/topn_lazy/topn_lazy_on_data_model.out b/regression-test/data/query_p0/topn_lazy/topn_lazy_on_data_model.out new file mode 100644 index 00000000000000..f218255f299828 --- /dev/null +++ b/regression-test/data/query_p0/topn_lazy/topn_lazy_on_data_model.out @@ -0,0 +1,32 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !shape -- +PhysicalResultSink +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(mor.user_id,mor.username) lazySlots:(mor.age)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------filter((mor.__DORIS_DELETE_SIGN__ = 0)) +----------------PhysicalLazyMaterializeOlapScan[mor lazySlots:(mor.age)] + +-- !shape -- +PhysicalResultSink +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(mow.username) lazySlots:(mow.age,mow.user_id)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------filter((mow.__DORIS_DELETE_SIGN__ = 0)) +----------------PhysicalLazyMaterializeOlapScan[mow lazySlots:(mow.user_id,mow.age)] + +-- !shape -- +PhysicalResultSink +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(agg.user_id,agg.username) lazySlots:(agg.age)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalLazyMaterializeOlapScan[agg lazySlots:(agg.age)] + diff --git a/regression-test/data/shape_check/clickbench/query24.out b/regression-test/data/shape_check/clickbench/query24.out index fd0a2f5b670727..5aebbbfaa49ff5 100644 --- a/regression-test/data/shape_check/clickbench/query24.out +++ b/regression-test/data/shape_check/clickbench/query24.out @@ -1,9 +1,11 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ckbench_shape_24 -- -PhysicalDeferMaterializeResultSink ---PhysicalDeferMaterializeTopN -----PhysicalDistribute[DistributionSpecGather] -------PhysicalDeferMaterializeTopN ---------filter((URL like '%google%')) -----------PhysicalDeferMaterializeOlapScan[hits] +PhysicalResultSink +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(hits.EventTime,hits.URL) lazySlots:(hits.AdvEngineID,hits.Age,hits.BrowserCountry,hits.BrowserLanguage,hits.CLID,hits.ClientEventTime,hits.ClientIP,hits.ClientTimeZone,hits.CodeVersion,hits.ConnectTiming,hits.CookieEnable,hits.CounterClass,hits.CounterID,hits.DNSTiming,hits.DontCountHits,hits.EventDate,hits.FUniqID,hits.FetchTiming,hits.FlashMajor,hits.FlashMinor,hits.FlashMinor2,hits.FromTag,hits.GoodEvent,hits.HID,hits.HTTPError,hits.HasGCLID,hits.HistoryLength,hits.HitColor,hits.IPNetworkID,hits.Income,hits.Interests,hits.IsArtifical,hits.IsDownload,hits.IsEvent,hits.IsLink,hits.IsMobile,hits.IsNotBounce,hits.IsOldCounter,hits.IsParameter,hits.IsRefresh,hits.JavaEnable,hits.JavascriptEnable,hits.LocalEventTime,hits.MobilePhone,hits.MobilePhoneModel,hits.NetMajor,hits.NetMinor,hits.OS,hits.OpenerName,hits.OpenstatAdID,hits.OpenstatCampaignID,hits.OpenstatServiceName,hits.OpenstatSourceID,hits.OriginalURL,hits.PageCharset,hits.ParamCurrency,hits.ParamCurrencyID,hits.ParamOrderID,hits.ParamPrice,hits.Params,hits.Referer,hits.RefererCategoryID,hits.RefererHash,hits.RefererRegionID,hits.RegionID,hits.RemoteIP,hits.ResolutionDepth,hits.ResolutionHeight,hits.ResolutionWidth,hits.ResponseEndTiming,hits.ResponseStartTiming,hits.Robotness,hits.SearchEngineID,hits.SearchPhrase,hits.SendTiming,hits.Sex,hits.SilverlightVersion1,hits.SilverlightVersion2,hits.SilverlightVersion3,hits.SilverlightVersion4,hits.SocialAction,hits.SocialNetwork,hits.SocialSourceNetworkID,hits.SocialSourcePage,hits.Title,hits.TraficSourceID,hits.URLCategoryID,hits.URLHash,hits.URLRegionID,hits.UTMCampaign,hits.UTMContent,hits.UTMMedium,hits.UTMSource,hits.UTMTerm,hits.UserAgent,hits.UserAgentMajor,hits.UserAgentMinor,hits.UserID,hits.WatchID,hits.WindowClientHeight,hits.WindowClientWidth,hits.WindowName,hits.WithHash)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------filter((URL like '%google%')) +--------------PhysicalLazyMaterializeOlapScan[hits lazySlots:(hits.CounterID,hits.EventDate,hits.UserID,hits.WatchID,hits.JavaEnable,hits.Title,hits.GoodEvent,hits.ClientIP,hits.RegionID,hits.CounterClass,hits.OS,hits.UserAgent,hits.Referer,hits.IsRefresh,hits.RefererCategoryID,hits.RefererRegionID,hits.URLCategoryID,hits.URLRegionID,hits.ResolutionWidth,hits.ResolutionHeight,hits.ResolutionDepth,hits.FlashMajor,hits.FlashMinor,hits.FlashMinor2,hits.NetMajor,hits.NetMinor,hits.UserAgentMajor,hits.UserAgentMinor,hits.CookieEnable,hits.JavascriptEnable,hits.IsMobile,hits.MobilePhone,hits.MobilePhoneModel,hits.Params,hits.IPNetworkID,hits.TraficSourceID,hits.SearchEngineID,hits.SearchPhrase,hits.AdvEngineID,hits.IsArtifical,hits.WindowClientWidth,hits.WindowClientHeight,hits.ClientTimeZone,hits.ClientEventTime,hits.SilverlightVersion1,hits.SilverlightVersion2,hits.SilverlightVersion3,hits.SilverlightVersion4,hits.PageCharset,hits.CodeVersion,hits.IsLink,hits.IsDownload,hits.IsNotBounce,hits.FUniqID,hits.OriginalURL,hits.HID,hits.IsOldCounter,hits.IsEvent,hits.IsParameter,hits.DontCountHits,hits.WithHash,hits.HitColor,hits.LocalEventTime,hits.Age,hits.Sex,hits.Income,hits.Interests,hits.Robotness,hits.RemoteIP,hits.WindowName,hits.OpenerName,hits.HistoryLength,hits.BrowserLanguage,hits.BrowserCountry,hits.SocialNetwork,hits.SocialAction,hits.HTTPError,hits.SendTiming,hits.DNSTiming,hits.ConnectTiming,hits.ResponseStartTiming,hits.ResponseEndTiming,hits.FetchTiming,hits.SocialSourceNetworkID,hits.SocialSourcePage,hits.ParamPrice,hits.ParamOrderID,hits.ParamCurrency,hits.ParamCurrencyID,hits.OpenstatServiceName,hits.OpenstatCampaignID,hits.OpenstatAdID,hits.OpenstatSourceID,hits.UTMSource,hits.UTMMedium,hits.UTMCampaign,hits.UTMContent,hits.UTMTerm,hits.FromTag,hits.HasGCLID,hits.RefererHash,hits.URLHash,hits.CLID)] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out index e956468874dbb5..5c6bc6f2c92eb5 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------filter((date_dim.d_year = 2002)) --------------------PhysicalOlapScan[date_dim] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,ctr1.ctr_total_return) lazySlots:(customer.c_birth_country,customer.c_birth_day,customer.c_birth_month,customer.c_birth_year,customer.c_email_address,customer.c_first_name,customer.c_last_name,customer.c_last_review_date_sk,customer.c_login,customer.c_preferred_cust_flag,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF3 -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'IN')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_birth_month,customer.c_birth_year,customer.c_birth_country,customer.c_login,customer.c_email_address,customer.c_last_review_date_sk,customer.c_salutation,customer.c_first_name,customer.c_last_name,customer.c_preferred_cust_flag,customer.c_birth_day)] apply RFs: RF3 +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'IN')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query44.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query44.out index c2cc91b7f43043..ee676398836ece 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query44.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query44.out @@ -1,69 +1,71 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------PhysicalOlapScan[item] apply RFs: RF1 -------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 +------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF1 ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] --------------------PhysicalProject -----------------------filter((rnk < 11)) -------------------------PhysicalWindow ---------------------------PhysicalQuickSort[MERGE_SORT] -----------------------------PhysicalDistribute[DistributionSpecGather] -------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------PhysicalPartitionTopN -----------------------------------PhysicalProject -------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) ---------------------------------------PhysicalProject -----------------------------------------hashAgg[GLOBAL] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------hashAgg[LOCAL] -----------------------------------------------PhysicalProject -------------------------------------------------filter((ss1.ss_store_sk = 146)) ---------------------------------------------------PhysicalOlapScan[store_sales] ---------------------------------------PhysicalProject -----------------------------------------PhysicalAssertNumRows -------------------------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------------------------PhysicalProject -----------------------------------------------hashAgg[GLOBAL] -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------hashAgg[LOCAL] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) ---------------------------------------------------------PhysicalOlapScan[store_sales] +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF0 --------------------PhysicalProject -----------------------filter((rnk < 11)) -------------------------PhysicalWindow ---------------------------PhysicalQuickSort[MERGE_SORT] -----------------------------PhysicalDistribute[DistributionSpecGather] -------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------PhysicalPartitionTopN -----------------------------------PhysicalProject -------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +------------------------PhysicalProject +--------------------------filter((rnk < 11)) +----------------------------PhysicalWindow +------------------------------PhysicalQuickSort[MERGE_SORT] +--------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------PhysicalQuickSort[LOCAL_SORT] +------------------------------------PhysicalPartitionTopN --------------------------------------PhysicalProject -----------------------------------------hashAgg[GLOBAL] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------hashAgg[LOCAL] -----------------------------------------------PhysicalProject -------------------------------------------------filter((ss1.ss_store_sk = 146)) ---------------------------------------------------PhysicalOlapScan[store_sales] +----------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +------------------------------------------PhysicalProject +--------------------------------------------hashAgg[GLOBAL] +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((ss1.ss_store_sk = 146)) +------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalAssertNumRows +----------------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------------PhysicalProject +--------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +------------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------PhysicalProject +--------------------------filter((rnk < 11)) +----------------------------PhysicalWindow +------------------------------PhysicalQuickSort[MERGE_SORT] +--------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------PhysicalQuickSort[LOCAL_SORT] +------------------------------------PhysicalPartitionTopN --------------------------------------PhysicalProject -----------------------------------------PhysicalAssertNumRows -------------------------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------------------------PhysicalProject -----------------------------------------------hashAgg[GLOBAL] -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------hashAgg[LOCAL] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) ---------------------------------------------------------PhysicalOlapScan[store_sales] +----------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +------------------------------------------PhysicalProject +--------------------------------------------hashAgg[GLOBAL] +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((ss1.ss_store_sk = 146)) +------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalAssertNumRows +----------------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------------PhysicalProject +--------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +------------------------------------------------------------PhysicalOlapScan[store_sales] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query65.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query65.out index c19f18d7bfa1b6..58ec73205f4728 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query65.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query65.out @@ -1,41 +1,43 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_65 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() +--------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF4 -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() --------------------PhysicalProject -----------------------PhysicalOlapScan[store] apply RFs: RF4 -----------------PhysicalProject -------------------PhysicalOlapScan[item] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] apply RFs: RF4 +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query68.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query68.out index 0fd8ddd97ecd2c..fa40195e0fcefa 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query68.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query68.out @@ -1,38 +1,40 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) -------------------------------------PhysicalOlapScan[store] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) +------------------------------------PhysicalOlapScan[household_demographics] ----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) ---------------------------------PhysicalOlapScan[household_demographics] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalOlapScan[customer_address] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] ----------------PhysicalProject -------------------PhysicalOlapScan[customer] -------------PhysicalProject ---------------PhysicalOlapScan[customer_address] +------------------PhysicalOlapScan[customer_address] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out index ff1229656de0c4..1495dc4c9f4eb8 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------filter((date_dim.d_year = 2002)) --------------------PhysicalOlapScan[date_dim] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,customer_address.ca_street_number,customer_address.ca_street_name,customer_address.ca_street_type,customer_address.ca_suite_number,customer_address.ca_city,customer_address.ca_county,customer_address.ca_state,customer_address.ca_zip,customer_address.ca_country,customer_address.ca_gmt_offset,customer_address.ca_location_type,ctr1.ctr_total_return) lazySlots:(customer.c_first_name,customer.c_last_name,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) +----------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 +--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF4 -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() --------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------PhysicalProject -----------------filter((customer_address.ca_state = 'CA')) -------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_last_name,customer.c_salutation,customer.c_first_name)] apply RFs: RF4 +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------PhysicalProject +--------------------filter((customer_address.ca_state = 'CA')) +----------------------PhysicalOlapScan[customer_address] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out index 748165ced2fb2a..033748124648b8 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------filter((date_dim.d_year = 2002)) --------------------PhysicalOlapScan[date_dim] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,ctr1.ctr_total_return) lazySlots:(customer.c_birth_country,customer.c_birth_day,customer.c_birth_month,customer.c_birth_year,customer.c_email_address,customer.c_first_name,customer.c_last_name,customer.c_last_review_date_sk,customer.c_login,customer.c_preferred_cust_flag,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF4 +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF3 -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'IN')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF4 +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_birth_month,customer.c_birth_year,customer.c_birth_country,customer.c_login,customer.c_email_address,customer.c_last_review_date_sk,customer.c_salutation,customer.c_first_name,customer.c_last_name,customer.c_preferred_cust_flag,customer.c_birth_day)] apply RFs: RF3 +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'IN')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query44.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query44.out index c2cc91b7f43043..ee676398836ece 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query44.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query44.out @@ -1,69 +1,71 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------PhysicalOlapScan[item] apply RFs: RF1 -------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 +------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF1 ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] --------------------PhysicalProject -----------------------filter((rnk < 11)) -------------------------PhysicalWindow ---------------------------PhysicalQuickSort[MERGE_SORT] -----------------------------PhysicalDistribute[DistributionSpecGather] -------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------PhysicalPartitionTopN -----------------------------------PhysicalProject -------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) ---------------------------------------PhysicalProject -----------------------------------------hashAgg[GLOBAL] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------hashAgg[LOCAL] -----------------------------------------------PhysicalProject -------------------------------------------------filter((ss1.ss_store_sk = 146)) ---------------------------------------------------PhysicalOlapScan[store_sales] ---------------------------------------PhysicalProject -----------------------------------------PhysicalAssertNumRows -------------------------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------------------------PhysicalProject -----------------------------------------------hashAgg[GLOBAL] -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------hashAgg[LOCAL] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) ---------------------------------------------------------PhysicalOlapScan[store_sales] +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF0 --------------------PhysicalProject -----------------------filter((rnk < 11)) -------------------------PhysicalWindow ---------------------------PhysicalQuickSort[MERGE_SORT] -----------------------------PhysicalDistribute[DistributionSpecGather] -------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------PhysicalPartitionTopN -----------------------------------PhysicalProject -------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +------------------------PhysicalProject +--------------------------filter((rnk < 11)) +----------------------------PhysicalWindow +------------------------------PhysicalQuickSort[MERGE_SORT] +--------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------PhysicalQuickSort[LOCAL_SORT] +------------------------------------PhysicalPartitionTopN --------------------------------------PhysicalProject -----------------------------------------hashAgg[GLOBAL] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------hashAgg[LOCAL] -----------------------------------------------PhysicalProject -------------------------------------------------filter((ss1.ss_store_sk = 146)) ---------------------------------------------------PhysicalOlapScan[store_sales] +----------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +------------------------------------------PhysicalProject +--------------------------------------------hashAgg[GLOBAL] +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((ss1.ss_store_sk = 146)) +------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalAssertNumRows +----------------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------------PhysicalProject +--------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +------------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------PhysicalProject +--------------------------filter((rnk < 11)) +----------------------------PhysicalWindow +------------------------------PhysicalQuickSort[MERGE_SORT] +--------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------PhysicalQuickSort[LOCAL_SORT] +------------------------------------PhysicalPartitionTopN --------------------------------------PhysicalProject -----------------------------------------PhysicalAssertNumRows -------------------------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------------------------PhysicalProject -----------------------------------------------hashAgg[GLOBAL] -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------hashAgg[LOCAL] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) ---------------------------------------------------------PhysicalOlapScan[store_sales] +----------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +------------------------------------------PhysicalProject +--------------------------------------------hashAgg[GLOBAL] +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((ss1.ss_store_sk = 146)) +------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalAssertNumRows +----------------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------------PhysicalProject +--------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +------------------------------------------------------------PhysicalOlapScan[store_sales] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query65.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query65.out index 4dd67a91e1e98e..c521ab31e4f4f0 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query65.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query65.out @@ -1,41 +1,43 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_65 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +--------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] --------------------PhysicalProject -----------------------PhysicalOlapScan[store] apply RFs: RF4 -----------------PhysicalProject -------------------PhysicalOlapScan[item] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] apply RFs: RF4 +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query68.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query68.out index 20e70268ebb862..a417594ba5b43a 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query68.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query68.out @@ -1,38 +1,40 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 ca_address_sk->[c_current_addr_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 ca_address_sk->[c_current_addr_sk] ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ss_addr_sk] +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ss_addr_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 RF4 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 RF4 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) -------------------------------------PhysicalOlapScan[store] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) +------------------------------------PhysicalOlapScan[household_demographics] ----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) ---------------------------------PhysicalOlapScan[household_demographics] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalOlapScan[customer_address] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF5 ----------------PhysicalProject -------------------PhysicalOlapScan[customer] apply RFs: RF5 -------------PhysicalProject ---------------PhysicalOlapScan[customer_address] +------------------PhysicalOlapScan[customer_address] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out index 19406180e00551..721c598bde1885 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------filter((date_dim.d_year = 2002)) --------------------PhysicalOlapScan[date_dim] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,customer_address.ca_street_number,customer_address.ca_street_name,customer_address.ca_street_type,customer_address.ca_suite_number,customer_address.ca_city,customer_address.ca_county,customer_address.ca_state,customer_address.ca_zip,customer_address.ca_country,customer_address.ca_gmt_offset,customer_address.ca_location_type,ctr1.ctr_total_return) lazySlots:(customer.c_first_name,customer.c_last_name,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF3 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF3 +--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF3 ctr_state->[ctr_state] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF4 -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalDistribute[DistributionSpecExecutionAny] ---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------PhysicalProject -----------------filter((customer_address.ca_state = 'CA')) -------------------PhysicalOlapScan[customer_address] +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_last_name,customer.c_salutation,customer.c_first_name)] apply RFs: RF4 +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------PhysicalProject +--------------------filter((customer_address.ca_state = 'CA')) +----------------------PhysicalOlapScan[customer_address] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out index 096e57d1a8103e..015d1e28fcc819 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------filter((date_dim.d_year = 2002)) ------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,ctr1.ctr_total_return) lazySlots:(customer.c_birth_country,customer.c_birth_day,customer.c_birth_month,customer.c_birth_year,customer.c_email_address,customer.c_first_name,customer.c_last_name,customer.c_last_review_date_sk,customer.c_login,customer.c_preferred_cust_flag,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ctr_customer_sk->[c_customer_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF2 RF3 -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'IN')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ctr_customer_sk->[c_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_birth_month,customer.c_birth_year,customer.c_birth_country,customer.c_login,customer.c_email_address,customer.c_last_review_date_sk,customer.c_salutation,customer.c_first_name,customer.c_last_name,customer.c_preferred_cust_flag,customer.c_birth_day)] apply RFs: RF2 RF3 +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'IN')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query44.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query44.out index 86d157354860a4..ba76d2e22bcc43 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query44.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query44.out @@ -1,69 +1,71 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF1 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 146)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF1 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] -------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 146)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 146)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 146)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query65.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query65.out index 991425ccc20647..9b233769312c7a 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query65.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query65.out @@ -1,41 +1,43 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_65 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF4 -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() --------------------PhysicalProject -----------------------PhysicalOlapScan[item] -----------------PhysicalProject -------------------PhysicalOlapScan[store] apply RFs: RF4 -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[store] apply RFs: RF4 +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query68.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query68.out index aa07d1b2a42d9b..f93e4120b2382a 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query68.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query68.out @@ -1,38 +1,40 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------PhysicalOlapScan[customer_address] apply RFs: RF5 -------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] ----------------PhysicalProject -------------------PhysicalOlapScan[customer] apply RFs: RF4 +------------------PhysicalOlapScan[customer_address] apply RFs: RF5 ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF4 +--------------------PhysicalProject +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) -------------------------------------PhysicalOlapScan[store] -----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) ---------------------------------PhysicalOlapScan[household_demographics] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) +------------------------------------PhysicalOlapScan[household_demographics] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out index bede6759759191..8e8de0a05f3be0 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------PhysicalProject ------------------PhysicalOlapScan[customer_address] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,customer_address.ca_street_number,customer_address.ca_street_name,customer_address.ca_street_type,customer_address.ca_suite_number,customer_address.ca_city,customer_address.ca_county,customer_address.ca_state,customer_address.ca_zip,customer_address.ca_country,customer_address.ca_gmt_offset,customer_address.ca_location_type,ctr1.ctr_total_return) lazySlots:(customer.c_first_name,customer.c_last_name,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF3 -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'CA')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_last_name,customer.c_salutation,customer.c_first_name)] apply RFs: RF3 +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'CA')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query30.out b/regression-test/data/shape_check/tpcds_sf100/shape/query30.out index 8f09a0854e047b..195a2f6c6902ed 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query30.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------filter((date_dim.d_year = 2002)) ------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,ctr1.ctr_total_return) lazySlots:(customer.c_birth_country,customer.c_birth_day,customer.c_birth_month,customer.c_birth_year,customer.c_email_address,customer.c_first_name,customer.c_last_name,customer.c_last_review_date_sk,customer.c_login,customer.c_preferred_cust_flag,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ctr_customer_sk->[c_customer_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF2 RF3 -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'IN')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ctr_customer_sk->[c_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_birth_month,customer.c_birth_year,customer.c_birth_country,customer.c_login,customer.c_email_address,customer.c_last_review_date_sk,customer.c_salutation,customer.c_first_name,customer.c_last_name,customer.c_preferred_cust_flag,customer.c_birth_day)] apply RFs: RF2 RF3 +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'IN')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query44.out b/regression-test/data/shape_check/tpcds_sf100/shape/query44.out index 86d157354860a4..ba76d2e22bcc43 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query44.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query44.out @@ -1,69 +1,71 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF1 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 146)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF1 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] -------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 146)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 146)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 146)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 146) and ss_addr_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query65.out b/regression-test/data/shape_check/tpcds_sf100/shape/query65.out index f45946144fc142..d79e026da9ce01 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query65.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query65.out @@ -1,41 +1,43 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_65 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] -----------------PhysicalProject -------------------PhysicalOlapScan[store] apply RFs: RF4 -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[store] apply RFs: RF4 +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1232) and (date_dim.d_month_seq >= 1221)) +--------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query68.out b/regression-test/data/shape_check/tpcds_sf100/shape/query68.out index aa07d1b2a42d9b..f93e4120b2382a 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query68.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query68.out @@ -1,38 +1,40 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------PhysicalOlapScan[customer_address] apply RFs: RF5 -------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] ----------------PhysicalProject -------------------PhysicalOlapScan[customer] apply RFs: RF4 +------------------PhysicalOlapScan[customer_address] apply RFs: RF5 ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF4 +--------------------PhysicalProject +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Five Points', 'Pleasant Hill')) -------------------------------------PhysicalOlapScan[store] -----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) ---------------------------------PhysicalOlapScan[household_demographics] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = -1)]) +------------------------------------PhysicalOlapScan[household_demographics] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query81.out b/regression-test/data/shape_check/tpcds_sf100/shape/query81.out index 08be90894dded4..fd4c79be05ba48 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query81.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------PhysicalProject ------------------PhysicalOlapScan[customer_address] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,customer_address.ca_street_number,customer_address.ca_street_name,customer_address.ca_street_type,customer_address.ca_suite_number,customer_address.ca_city,customer_address.ca_county,customer_address.ca_state,customer_address.ca_zip,customer_address.ca_country,customer_address.ca_gmt_offset,customer_address.ca_location_type,ctr1.ctr_total_return) lazySlots:(customer.c_first_name,customer.c_last_name,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF4 +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF3 -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'CA')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF4 +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_last_name,customer.c_salutation,customer.c_first_name)] apply RFs: RF3 +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'CA')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query44.out b/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query44.out index 5c302c265fc9f3..95d56b78edc13e 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query44.out +++ b/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query44.out @@ -1,69 +1,71 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF1 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 4)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF1 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] -------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 4)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] diff --git a/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query68.out b/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query68.out index 2f4fbe401f1315..bcbc6dfb42e77e 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query68.out +++ b/regression-test/data/shape_check/tpcds_sf1000/bs_downgrade_shape/query68.out @@ -1,38 +1,40 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------PhysicalOlapScan[customer_address] apply RFs: RF5 -------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] ----------------PhysicalProject -------------------PhysicalOlapScan[customer] apply RFs: RF4 +------------------PhysicalOlapScan[customer_address] apply RFs: RF5 ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF4 +--------------------PhysicalProject +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Fairview', 'Midway')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Fairview', 'Midway')) -------------------------------------PhysicalOlapScan[store] -----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 3),(household_demographics.hd_vehicle_count = 4)]) ---------------------------------PhysicalOlapScan[household_demographics] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 3),(household_demographics.hd_vehicle_count = 4)]) +------------------------------------PhysicalOlapScan[household_demographics] diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out index f1da603468bba0..89bdde1385d235 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out @@ -18,26 +18,28 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------PhysicalProject ------------------PhysicalOlapScan[customer_address] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,ctr1.ctr_total_return) lazySlots:(customer.c_birth_country,customer.c_birth_day,customer.c_birth_month,customer.c_birth_year,customer.c_email_address,customer.c_first_name,customer.c_last_name,customer.c_last_review_date_sk,customer.c_login,customer.c_preferred_cust_flag,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ctr_customer_sk] -------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3 RF4 +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[c_current_addr_sk] -----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF2 +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ctr_customer_sk] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3 RF4 ----------------------PhysicalProject -------------------------filter((customer_address.ca_state = 'AR')) ---------------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[c_current_addr_sk] +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_birth_month,customer.c_birth_year,customer.c_birth_country,customer.c_login,customer.c_email_address,customer.c_last_review_date_sk,customer.c_salutation,customer.c_first_name,customer.c_last_name,customer.c_preferred_cust_flag,customer.c_birth_day)] apply RFs: RF2 +--------------------------PhysicalProject +----------------------------filter((customer_address.ca_state = 'AR')) +------------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) Hint log: Used: leading(web_returns date_dim customer_address ) leading(ctr1 { customer customer_address } ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query44.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query44.out index 013c93d6ea7d8c..e928b42427a5cd 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query44.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query44.out @@ -1,71 +1,73 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF1 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 4)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF1 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] -------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 4)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] Hint log: Used: leading(i1 asceding { i2 descending } ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query65.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query65.out index 2c5c376dfd50b2..e1ef06b03db820 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query65.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query65.out @@ -1,44 +1,46 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_65 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] --------------------PhysicalProject -----------------------hashAgg[GLOBAL] -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------hashAgg[LOCAL] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------PhysicalProject +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) -------------------------------------PhysicalOlapScan[date_dim] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) +----------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] apply RFs: RF4 --------------------PhysicalProject -----------------------PhysicalOlapScan[store] apply RFs: RF4 -----------------PhysicalProject -------------------PhysicalOlapScan[item] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) +--------------------------------------PhysicalOlapScan[date_dim] Hint log: Used: leading(store_sales date_dim ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query68.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query68.out index 9e66e56512ebc7..8a4c6b6f818847 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query68.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query68.out @@ -1,40 +1,42 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------PhysicalOlapScan[customer_address] apply RFs: RF5 -------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] ----------------PhysicalProject -------------------PhysicalOlapScan[customer] apply RFs: RF4 +------------------PhysicalOlapScan[customer_address] apply RFs: RF5 ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF4 +--------------------PhysicalProject +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Fairview', 'Midway')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Fairview', 'Midway')) -------------------------------------PhysicalOlapScan[store] -----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 3),(household_demographics.hd_vehicle_count = 4)]) ---------------------------------PhysicalOlapScan[household_demographics] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 3),(household_demographics.hd_vehicle_count = 4)]) +------------------------------------PhysicalOlapScan[household_demographics] Hint log: Used: leading(customer_address { store_sales date_dim store household_demographics } ) leading(current_addr { customer dn } ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out index fcbe4a8ad57c34..f2291082b7f799 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out @@ -18,26 +18,28 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------PhysicalProject ------------------PhysicalOlapScan[customer_address] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,customer_address.ca_street_number,customer_address.ca_street_name,customer_address.ca_street_type,customer_address.ca_suite_number,customer_address.ca_city,customer_address.ca_county,customer_address.ca_state,customer_address.ca_zip,customer_address.ca_country,customer_address.ca_gmt_offset,customer_address.ca_location_type,ctr1.ctr_total_return) lazySlots:(customer.c_first_name,customer.c_last_name,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ctr_customer_sk->[c_customer_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[c_current_addr_sk] -----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF2 RF3 +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ctr_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_state = 'TN')) ---------------------------PhysicalOlapScan[customer_address] -------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[c_current_addr_sk] +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_last_name,customer.c_salutation,customer.c_first_name)] apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------filter((customer_address.ca_state = 'TN')) +------------------------------PhysicalOlapScan[customer_address] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) Hint log: Used: leading(catalog_returns date_dim customer_address ) leading(customer customer_address ctr1 ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out index 1b157170e9f628..30b8b3b0ba6597 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------PhysicalProject ------------------PhysicalOlapScan[customer_address] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,ctr1.ctr_total_return) lazySlots:(customer.c_birth_country,customer.c_birth_day,customer.c_birth_month,customer.c_birth_year,customer.c_email_address,customer.c_first_name,customer.c_last_name,customer.c_last_review_date_sk,customer.c_login,customer.c_preferred_cust_flag,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ctr_customer_sk->[c_customer_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF2 RF3 -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'AR')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ctr_customer_sk->[c_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_birth_month,customer.c_birth_year,customer.c_birth_country,customer.c_login,customer.c_email_address,customer.c_last_review_date_sk,customer.c_salutation,customer.c_first_name,customer.c_last_name,customer.c_preferred_cust_flag,customer.c_birth_day)] apply RFs: RF2 RF3 +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'AR')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query44.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query44.out index 5c302c265fc9f3..95d56b78edc13e 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query44.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query44.out @@ -1,69 +1,71 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF1 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 4)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF1 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] -------------PhysicalProject ---------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 -----------------PhysicalProject -------------------filter((rnk < 11)) ---------------------PhysicalWindow -----------------------PhysicalQuickSort[MERGE_SORT] -------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------PhysicalQuickSort[LOCAL_SORT] -----------------------------PhysicalPartitionTopN -------------------------------PhysicalProject ---------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) -----------------------------------PhysicalProject -------------------------------------hashAgg[GLOBAL] ---------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------hashAgg[LOCAL] -------------------------------------------PhysicalProject ---------------------------------------------filter((ss1.ss_store_sk = 4)) -----------------------------------------------PhysicalOlapScan[store_sales] +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN ----------------------------------PhysicalProject -------------------------------------PhysicalAssertNumRows ---------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------PhysicalProject -------------------------------------------hashAgg[GLOBAL] ---------------------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------------------hashAgg[LOCAL] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) -----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query65.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query65.out index 46370ff0faebb0..3840e10df485e8 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query65.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query65.out @@ -1,41 +1,43 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_65 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) -----------------------------------PhysicalOlapScan[date_dim] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] --------------------PhysicalProject -----------------------PhysicalOlapScan[store] apply RFs: RF4 -----------------PhysicalProject -------------------PhysicalOlapScan[item] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] apply RFs: RF4 +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) +--------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query68.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query68.out index 2f4fbe401f1315..bcbc6dfb42e77e 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query68.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query68.out @@ -1,38 +1,40 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------PhysicalOlapScan[customer_address] apply RFs: RF5 -------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] ----------------PhysicalProject -------------------PhysicalOlapScan[customer] apply RFs: RF4 +------------------PhysicalOlapScan[customer_address] apply RFs: RF5 ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF4 +--------------------PhysicalProject +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Fairview', 'Midway')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Fairview', 'Midway')) -------------------------------------PhysicalOlapScan[store] -----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 3),(household_demographics.hd_vehicle_count = 4)]) ---------------------------------PhysicalOlapScan[household_demographics] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 3),(household_demographics.hd_vehicle_count = 4)]) +------------------------------------PhysicalOlapScan[household_demographics] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out index f7126848fd47b5..7aa4574a81f9c2 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out @@ -18,24 +18,26 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------PhysicalProject ------------------PhysicalOlapScan[customer_address] --PhysicalResultSink -----PhysicalTopN[MERGE_SORT] -------PhysicalDistribute[DistributionSpecGather] ---------PhysicalTopN[LOCAL_SORT] -----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] +----PhysicalProject +------PhysicalLazyMaterialize[materializedSlots:(customer.c_customer_id,customer_address.ca_street_number,customer_address.ca_street_name,customer_address.ca_street_type,customer_address.ca_suite_number,customer_address.ca_city,customer_address.ca_county,customer_address.ca_state,customer_address.ca_zip,customer_address.ca_country,customer_address.ca_gmt_offset,customer_address.ca_location_type,ctr1.ctr_total_return) lazySlots:(customer.c_first_name,customer.c_last_name,customer.c_salutation)] +--------PhysicalTopN[MERGE_SORT] +----------PhysicalDistribute[DistributionSpecGather] +------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF4 +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] apply RFs: RF3 -------------------PhysicalProject ---------------------filter((customer_address.ca_state = 'TN')) -----------------------PhysicalOlapScan[customer_address] ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute[DistributionSpecHash] -------------------hashAgg[LOCAL] ---------------------PhysicalDistribute[DistributionSpecExecutionAny] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF4 +--------------------------PhysicalProject +----------------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_last_name,customer.c_salutation,customer.c_first_name)] apply RFs: RF3 +----------------------PhysicalProject +------------------------filter((customer_address.ca_state = 'TN')) +--------------------------PhysicalOlapScan[customer_address] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query44.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query44.out index 6549d1401b11a3..966a4f2f5d449f 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query44.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query44.out @@ -1,69 +1,71 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_44 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() ----------------PhysicalProject -------------------PhysicalOlapScan[item] apply RFs: RF0 -----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] --------------------PhysicalProject -----------------------filter((rnk < 11)) -------------------------PhysicalWindow ---------------------------PhysicalQuickSort[MERGE_SORT] -----------------------------PhysicalDistribute[DistributionSpecGather] -------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------PhysicalPartitionTopN -----------------------------------PhysicalProject -------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) ---------------------------------------PhysicalProject -----------------------------------------hashAgg[GLOBAL] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------hashAgg[LOCAL] -----------------------------------------------PhysicalProject -------------------------------------------------filter((ss1.ss_store_sk = 366)) ---------------------------------------------------PhysicalOlapScan[store_sales] ---------------------------------------PhysicalProject -----------------------------------------PhysicalAssertNumRows -------------------------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------------------------PhysicalProject -----------------------------------------------hashAgg[GLOBAL] -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------hashAgg[LOCAL] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((store_sales.ss_store_sk = 366) and ss_cdemo_sk IS NULL) ---------------------------------------------------------PhysicalOlapScan[store_sales] +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF0 --------------------PhysicalProject -----------------------filter((rnk < 11)) -------------------------PhysicalWindow ---------------------------PhysicalQuickSort[MERGE_SORT] -----------------------------PhysicalDistribute[DistributionSpecGather] -------------------------------PhysicalQuickSort[LOCAL_SORT] ---------------------------------PhysicalPartitionTopN -----------------------------------PhysicalProject -------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +------------------------PhysicalProject +--------------------------filter((rnk < 11)) +----------------------------PhysicalWindow +------------------------------PhysicalQuickSort[MERGE_SORT] +--------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------PhysicalQuickSort[LOCAL_SORT] +------------------------------------PhysicalPartitionTopN --------------------------------------PhysicalProject -----------------------------------------hashAgg[GLOBAL] -------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------hashAgg[LOCAL] -----------------------------------------------PhysicalProject -------------------------------------------------filter((ss1.ss_store_sk = 366)) ---------------------------------------------------PhysicalOlapScan[store_sales] +----------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +------------------------------------------PhysicalProject +--------------------------------------------hashAgg[GLOBAL] +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((ss1.ss_store_sk = 366)) +------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalAssertNumRows +----------------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------------PhysicalProject +--------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------filter((store_sales.ss_store_sk = 366) and ss_cdemo_sk IS NULL) +------------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------PhysicalProject +--------------------------filter((rnk < 11)) +----------------------------PhysicalWindow +------------------------------PhysicalQuickSort[MERGE_SORT] +--------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------PhysicalQuickSort[LOCAL_SORT] +------------------------------------PhysicalPartitionTopN --------------------------------------PhysicalProject -----------------------------------------PhysicalAssertNumRows -------------------------------------------PhysicalDistribute[DistributionSpecGather] ---------------------------------------------PhysicalProject -----------------------------------------------hashAgg[GLOBAL] -------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------hashAgg[LOCAL] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((store_sales.ss_store_sk = 366) and ss_cdemo_sk IS NULL) ---------------------------------------------------------PhysicalOlapScan[store_sales] -------------PhysicalProject ---------------PhysicalOlapScan[item] +----------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE)) +------------------------------------------PhysicalProject +--------------------------------------------hashAgg[GLOBAL] +----------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((ss1.ss_store_sk = 366)) +------------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalAssertNumRows +----------------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------------PhysicalProject +--------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------filter((store_sales.ss_store_sk = 366) and ss_cdemo_sk IS NULL) +------------------------------------------------------------PhysicalOlapScan[store_sales] +----------------PhysicalProject +------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query65.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query65.out index 574a272c000358..9cf67b6f83a740 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query65.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query65.out @@ -1,41 +1,43 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_65 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +--------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1197) and (date_dim.d_month_seq >= 1186)) -----------------------------------PhysicalOlapScan[date_dim] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] --------------------PhysicalProject -----------------------PhysicalOlapScan[store] apply RFs: RF4 -----------------PhysicalProject -------------------PhysicalOlapScan[item] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1197) and (date_dim.d_month_seq >= 1186)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] apply RFs: RF4 +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_month_seq <= 1197) and (date_dim.d_month_seq >= 1186)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1197) and (date_dim.d_month_seq >= 1186)) +--------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query68.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query68.out index 2339b9d070f674..8a3ad93d8b2cd1 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query68.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query68.out @@ -1,38 +1,40 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !ds_shape_68 -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 ca_address_sk->[c_current_addr_sk] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] ------------PhysicalProject ---------------hashJoin[INNER_JOIN shuffle] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] +--------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 ca_address_sk->[c_current_addr_sk] ----------------PhysicalProject -------------------hashAgg[LOCAL] +------------------hashJoin[INNER_JOIN shuffle] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ss_addr_sk] +----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ss_addr_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 RF4 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 RF4 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1999, 2000, 2001)) +--------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------PhysicalProject ---------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1999, 2000, 2001)) -----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------filter(s_city IN ('Bethel', 'Pleasant Hill')) +----------------------------------------PhysicalOlapScan[store] --------------------------------PhysicalProject -----------------------------------filter(s_city IN ('Bethel', 'Pleasant Hill')) -------------------------------------PhysicalOlapScan[store] +----------------------------------filter(OR[(household_demographics.hd_dep_count = 4),(household_demographics.hd_vehicle_count = 0)]) +------------------------------------PhysicalOlapScan[household_demographics] ----------------------------PhysicalProject -------------------------------filter(OR[(household_demographics.hd_dep_count = 4),(household_demographics.hd_vehicle_count = 0)]) ---------------------------------PhysicalOlapScan[household_demographics] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalOlapScan[customer_address] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF5 ----------------PhysicalProject -------------------PhysicalOlapScan[customer] apply RFs: RF5 -------------PhysicalProject ---------------PhysicalOlapScan[customer_address] +------------------PhysicalOlapScan[customer_address] diff --git a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q2.out b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q2.out index c1a68c315e06b2..2bbd70beab96a1 100644 --- a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q2.out +++ b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q2.out @@ -1,30 +1,32 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF3 r_regionkey->[n_regionkey] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(supplier.s_acctbal,supplier.s_name,nation.n_name,part.p_partkey) lazySlots:(part.p_mfgr,supplier.s_address,supplier.s_comment,supplier.s_phone)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) +----------------PhysicalWindow +------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF3 r_regionkey->[n_regionkey] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[ps_suppkey] +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 RF1 ---------------------------------PhysicalProject -----------------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) -------------------------------------PhysicalOlapScan[part] -----------------------------PhysicalOlapScan[supplier] apply RFs: RF2 +----------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 +------------------------------------PhysicalProject +--------------------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) +----------------------------------------PhysicalLazyMaterializeOlapScan[part lazySlots:(part.p_mfgr)] +--------------------------------PhysicalLazyMaterializeOlapScan[supplier lazySlots:(supplier.s_address,supplier.s_phone,supplier.s_comment)] apply RFs: RF2 +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[nation] apply RFs: RF3 ------------------------PhysicalProject ---------------------------PhysicalOlapScan[nation] apply RFs: RF3 ---------------------PhysicalProject -----------------------filter((region.r_name = 'EUROPE')) -------------------------PhysicalOlapScan[region] +--------------------------filter((region.r_name = 'EUROPE')) +----------------------------PhysicalOlapScan[region] diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q2.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q2.out index 6fdccbcadb8073..2a4aed95629cb3 100644 --- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q2.out +++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q2.out @@ -1,31 +1,33 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF3 ps_suppkey->[s_suppkey] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(supplier.s_acctbal,supplier.s_name,nation.n_name,part.p_partkey) lazySlots:(part.p_mfgr,supplier.s_address,supplier.s_comment,supplier.s_phone)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) +----------------PhysicalWindow +------------------PhysicalQuickSort[LOCAL_SORT] +--------------------PhysicalDistribute[DistributionSpecHash] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] ---------------------------PhysicalOlapScan[supplier] apply RFs: RF2 RF3 +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF3 ps_suppkey->[s_suppkey] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF1 r_regionkey->[n_regionkey] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] +------------------------------PhysicalLazyMaterializeOlapScan[supplier lazySlots:(supplier.s_address,supplier.s_phone,supplier.s_comment)] apply RFs: RF2 RF3 ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[nation] apply RFs: RF1 -------------------------------PhysicalProject ---------------------------------filter((region.r_name = 'EUROPE')) -----------------------------------PhysicalOlapScan[region] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF1 r_regionkey->[n_regionkey] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[nation] apply RFs: RF1 +----------------------------------PhysicalProject +------------------------------------filter((region.r_name = 'EUROPE')) +--------------------------------------PhysicalOlapScan[region] --------------------------PhysicalProject -----------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) -------------------------------PhysicalOlapScan[part] +----------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) +----------------------------------PhysicalLazyMaterializeOlapScan[part lazySlots:(part.p_mfgr)] diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q2.out b/regression-test/data/shape_check/tpch_sf1000/shape/q2.out index 6fdccbcadb8073..2a4aed95629cb3 100644 --- a/regression-test/data/shape_check/tpch_sf1000/shape/q2.out +++ b/regression-test/data/shape_check/tpch_sf1000/shape/q2.out @@ -1,31 +1,33 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF3 ps_suppkey->[s_suppkey] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(supplier.s_acctbal,supplier.s_name,nation.n_name,part.p_partkey) lazySlots:(part.p_mfgr,supplier.s_address,supplier.s_comment,supplier.s_phone)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) +----------------PhysicalWindow +------------------PhysicalQuickSort[LOCAL_SORT] +--------------------PhysicalDistribute[DistributionSpecHash] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] ---------------------------PhysicalOlapScan[supplier] apply RFs: RF2 RF3 +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF3 ps_suppkey->[s_suppkey] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF1 r_regionkey->[n_regionkey] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] +------------------------------PhysicalLazyMaterializeOlapScan[supplier lazySlots:(supplier.s_address,supplier.s_phone,supplier.s_comment)] apply RFs: RF2 RF3 ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[nation] apply RFs: RF1 -------------------------------PhysicalProject ---------------------------------filter((region.r_name = 'EUROPE')) -----------------------------------PhysicalOlapScan[region] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF1 r_regionkey->[n_regionkey] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[nation] apply RFs: RF1 +----------------------------------PhysicalProject +------------------------------------filter((region.r_name = 'EUROPE')) +--------------------------------------PhysicalOlapScan[region] --------------------------PhysicalProject -----------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) -------------------------------PhysicalOlapScan[part] +----------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) +----------------------------------PhysicalLazyMaterializeOlapScan[part lazySlots:(part.p_mfgr)] diff --git a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q2.out b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q2.out index c1a68c315e06b2..f65137169a261c 100644 --- a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q2.out +++ b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q2.out @@ -1,30 +1,32 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select -- PhysicalResultSink ---PhysicalTopN[MERGE_SORT] -----PhysicalDistribute[DistributionSpecGather] -------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF3 r_regionkey->[n_regionkey] +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(supplier.s_acctbal,supplier.s_name,nation.n_name,part.p_partkey) lazySlots:(part.p_mfgr,supplier.s_address,supplier.s_comment,supplier.s_phone)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------filter((partsupp.ps_supplycost = min(ps_supplycost) OVER(PARTITION BY p_partkey))) +----------------PhysicalWindow +------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((nation.n_regionkey = region.r_regionkey)) otherCondition=() build RFs:RF3 r_regionkey->[n_regionkey] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[ps_suppkey] +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[ps_suppkey] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 RF1 ---------------------------------PhysicalProject -----------------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) -------------------------------------PhysicalOlapScan[part] -----------------------------PhysicalOlapScan[supplier] apply RFs: RF2 +----------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 RF1 +------------------------------------PhysicalProject +--------------------------------------filter((p_type like '%BRASS') and (part.p_size = 15)) +----------------------------------------PhysicalLazyMaterializeOlapScan[part lazySlots:(part.p_mfgr)] +--------------------------------PhysicalLazyMaterializeOlapScan[supplier lazySlots:(supplier.s_address,supplier.s_phone,supplier.s_comment)] apply RFs: RF2 +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[nation] apply RFs: RF3 ------------------------PhysicalProject ---------------------------PhysicalOlapScan[nation] apply RFs: RF3 ---------------------PhysicalProject -----------------------filter((region.r_name = 'EUROPE')) -------------------------PhysicalOlapScan[region] +--------------------------filter((region.r_name = 'EUROPE')) +----------------------------PhysicalOlapScan[region] diff --git a/regression-test/suites/external_table_p0/hive/test_external_sql_block_rule.groovy b/regression-test/suites/external_table_p0/hive/test_external_sql_block_rule.groovy index 836a98ea36a7c2..0f118483224f88 100644 --- a/regression-test/suites/external_table_p0/hive/test_external_sql_block_rule.groovy +++ b/regression-test/suites/external_table_p0/hive/test_external_sql_block_rule.groovy @@ -22,20 +22,18 @@ suite("test_external_sql_block_rule", "external_docker,hive,external_docker_hive return; } - String hivePrefix = "hive2"; - String catalog_name = "test_${hivePrefix}_external_sql_block_rule"; String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") - String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hms_port = context.config.otherConfigs.get("hive2HmsPort") - sql """drop catalog if exists ${catalog_name} """ + sql """drop catalog if exists test_hive2_external_sql_block_rule """ - sql """CREATE CATALOG ${catalog_name} PROPERTIES ( + sql """CREATE CATALOG test_hive2_external_sql_block_rule PROPERTIES ( 'type'='hms', 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', 'hadoop.username' = 'hive' );""" - sql "use ${catalog_name}.`default`"; + sql "use test_hive2_external_sql_block_rule.`default`"; qt_sql01 """select * from parquet_partition_table order by l_linenumber,l_orderkey limit 10;""" sql """drop sql_block_rule if exists external_hive_partition""" @@ -84,21 +82,21 @@ suite("test_external_sql_block_rule", "external_docker,hive,external_docker_hive // login as external_block_user1 def result1 = connect('external_block_user1', '', context.config.jdbcUrl) { test { - sql """select * from ${catalog_name}.`default`.parquet_partition_table order by l_linenumber limit 10;""" + sql """select * from test_hive2_external_sql_block_rule.`default`.parquet_partition_table order by l_linenumber limit 10;""" exception """sql hits sql block rule: external_hive_partition, reach partition_num : 3""" } } // login as external_block_user2 def result2 = connect('external_block_user2', '', context.config.jdbcUrl) { test { - sql """select * from ${catalog_name}.`default`.parquet_partition_table order by l_linenumber limit 10;""" + sql """select * from test_hive2_external_sql_block_rule.`default`.parquet_partition_table order by l_linenumber limit 10;""" exception """sql hits sql block rule: external_hive_partition2, reach tablet_num : 3""" } } // login as external_block_user3 def result3 = connect('external_block_user3', '', context.config.jdbcUrl) { test { - sql """select * from ${catalog_name}.`default`.parquet_partition_table order by l_linenumber limit 10;""" + sql """select * from test_hive2_external_sql_block_rule.`default`.parquet_partition_table order by l_linenumber limit 10;""" exception """sql hits sql block rule: external_hive_partition3, reach cardinality : 3""" } } diff --git a/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy b/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy index 88d8a586e6847e..5e270cbf792331 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_rename_column_orc_parquet.groovy @@ -137,10 +137,71 @@ suite("test_hive_rename_column_orc_parquet", "p0,external,hive,external_docker,e qt_rename_parquet_12_false """ select f,new_d,c,new_b,new_a from test_hive_rename_column_parquet order by new_b,c; """ qt_rename_parquet_13_false """ select * from test_hive_rename_column_parquet where new_b + new_a != 31 order by new_b,c; """ - + sql """ set hive_orc_use_column_names=true; """ + qt_rename_orc_1_true_limit """ select * from test_hive_rename_column_orc order by new_b,c limit 3 """; + qt_rename_orc_2_true_limit """ select new_b from test_hive_rename_column_orc order by new_b,c limit 3 """; + qt_rename_orc_3_true_limit """ select new_b,count(*) from test_hive_rename_column_orc group by new_b order by new_b limit 3"""; + qt_rename_orc_4_true_limit """ select * from test_hive_rename_column_orc where new_a = 1 order by new_b,c limit 3"""; + qt_rename_orc_5_true_limit """ select * from test_hive_rename_column_orc where new_d is not null order by new_b,c limit 3 """ + qt_rename_orc_6_true_limit """ select * from test_hive_rename_column_orc where new_d is null order by new_b,c limit 3; """ + qt_rename_orc_7_true_limit """ select * from test_hive_rename_column_orc where new_b + new_a = 31 order by new_b,c limit 3; """ + qt_rename_orc_8_true_limit """ select new_a from test_hive_rename_column_orc where new_a = 1 order by new_b,c limit 3; """ + qt_rename_orc_9_true_limit """ select new_b from test_hive_rename_column_orc where new_b = 1 order by new_b limit 3; """ + qt_rename_orc_10_true_limit """ select new_b,new_d from test_hive_rename_column_orc where new_d +30*new_b=100 order by new_b,c limit 3; """ + qt_rename_orc_11_true_limit """ select new_b,new_a from test_hive_rename_column_orc order by new_b,c,new_a limit 3; """ + qt_rename_orc_12_true_limit """ select f,new_d,c,new_b,new_a from test_hive_rename_column_orc order by new_b,c limit 3; """ + qt_rename_orc_13_true_limit """ select * from test_hive_rename_column_orc where new_b + new_a != 31 order by new_b,c limit 3; """ + + + sql """ set hive_orc_use_column_names=false; """ + qt_rename_orc_1_false_limit """ select * from test_hive_rename_column_orc order by new_b,c limit 3 """; + qt_rename_orc_2_false_limit """ select new_b from test_hive_rename_column_orc order by new_b,c limit 3"""; + qt_rename_orc_3_false_limit """ select new_b,count(*) from test_hive_rename_column_orc group by new_b order by new_b limit 3"""; + qt_rename_orc_4_false_limit """ select * from test_hive_rename_column_orc where new_a = 1 order by new_b,c limit 3 """; + qt_rename_orc_5_false_limit """ select * from test_hive_rename_column_orc where new_d is not null order by new_b limit 3""" + qt_rename_orc_6_false_limit """ select * from test_hive_rename_column_orc where new_d is null order by new_b,c limit 3; """ + qt_rename_orc_7_false_limit """ select * from test_hive_rename_column_orc where new_b + new_a = 31 order by new_b,c limit 3; """ + qt_rename_orc_8_false_limit """ select new_a from test_hive_rename_column_orc where new_a = 1 order by new_b,c limit 3; """ + qt_rename_orc_9_false_limit """ select new_b from test_hive_rename_column_orc where new_b = 1 order by new_b limit 3; """ + qt_rename_orc_10_false_limit """ select new_b,new_d from test_hive_rename_column_orc where new_d +30*new_b=100 order by new_b,c limit 3; """ + qt_rename_orc_11_false_limit """ select new_b,new_a from test_hive_rename_column_orc order by new_b,c,new_a limit 3; """ + qt_rename_orc_12_false_limit """ select f,new_d,c,new_b,new_a from test_hive_rename_column_orc order by new_b,c limit 3; """ + qt_rename_orc_13_false_limit """ select * from test_hive_rename_column_orc where new_b + new_a != 31 order by new_b,c limit 3; """ + + sql """ set hive_parquet_use_column_names=true; """ + qt_rename_parquet_1_true_limit """ select * from test_hive_rename_column_parquet order by new_b,c limit 3 """; + qt_rename_parquet_2_true_limit """ select new_b from test_hive_rename_column_parquet order by new_b,c limit 3 """; + qt_rename_parquet_3_true_limit """ select new_b,count(*) from test_hive_rename_column_parquet group by new_b order by new_b limit 3"""; + qt_rename_parquet_4_true_limit """ select * from test_hive_rename_column_parquet where new_a = 1 order by new_b,c limit 3"""; + qt_rename_parquet_5_true_limit """ select * from test_hive_rename_column_parquet where new_d is not null order by new_b,c limit 3 """ + qt_rename_parquet_6_true_limit """ select * from test_hive_rename_column_parquet where new_d is null order by new_b,c limit 3; """ + qt_rename_parquet_7_true_limit """ select * from test_hive_rename_column_parquet where new_b + new_a = 31 order by new_b,c limit 3; """ + qt_rename_parquet_8_true_limit """ select new_a from test_hive_rename_column_parquet where new_a = 1 order by new_b,c limit 3; """ + qt_rename_parquet_9_true_limit """ select new_b from test_hive_rename_column_parquet where new_b = 1 order by new_b limit 3; """ + qt_rename_parquet_10_true_limit """ select new_b,new_d from test_hive_rename_column_parquet where new_d +30*new_b=100 order by new_b,c limit 3; """ + qt_rename_parquet_11_true_limit """ select new_b,new_a from test_hive_rename_column_parquet order by new_b,c,new_a limit 3; """ + qt_rename_parquet_12_true_limit """ select f,new_d,c,new_b,new_a from test_hive_rename_column_parquet order by new_b,c limit 3; """ + qt_rename_parquet_13_true_limit """ select * from test_hive_rename_column_parquet where new_b + new_a != 31 order by new_b,c limit 3; """ + + + sql """ set hive_parquet_use_column_names=false; """ + qt_rename_parquet_1_false_limit """ select * from test_hive_rename_column_parquet order by new_b,c limit 3"""; + qt_rename_parquet_2_false_limit """ select new_b from test_hive_rename_column_parquet order by new_b,c limit 3 """; + qt_rename_parquet_3_false_limit """ select new_b,count(*) from test_hive_rename_column_parquet group by new_b order by new_b limit 3"""; + qt_rename_parquet_4_false_limit """ select * from test_hive_rename_column_parquet where new_a = 1 order by new_b,c limit 3"""; + qt_rename_parquet_5_false_limit """ select * from test_hive_rename_column_parquet where new_d is not null order by new_b,c limit 3 """ + qt_rename_parquet_6_false_limit """ select * from test_hive_rename_column_parquet where new_d is null order by new_b,c limit 3; """ + qt_rename_parquet_7_false_limit """ select * from test_hive_rename_column_parquet where new_b + new_a = 31 order by new_b,c limit 3; """ + qt_rename_parquet_8_false_limit """ select new_a from test_hive_rename_column_parquet where new_a = 1 order by new_b,c limit 3; """ + qt_rename_parquet_9_false_limit """ select new_b from test_hive_rename_column_parquet where new_b = 1 order by new_b limit 3; """ + qt_rename_parquet_10_false_limit """ select new_b,new_d from test_hive_rename_column_parquet where new_d +30*new_b=100 order by new_b,c limit 3; """ + qt_rename_parquet_11_false_limit """ select new_b,new_a from test_hive_rename_column_parquet order by new_b,c,new_a limit 3; """ + qt_rename_parquet_12_false_limit """ select f,new_d,c,new_b,new_a from test_hive_rename_column_parquet order by new_b,c limit 3 ; """ + qt_rename_parquet_13_false_limit """ select * from test_hive_rename_column_parquet where new_b + new_a != 31 order by new_b,c limit 3; """ + } } /* diff --git a/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy b/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy new file mode 100644 index 00000000000000..de8677ff32935e --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_topn_lazy_mat", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + + // Define test function inside suite + def runTopNLazyMatTests = { + def limitValues = [3, 8, 29] + + // Single table query tests, using nested loops to iterate through tables and limit values + for (String table : ["orc_topn_lazy_mat_table", "parquet_topn_lazy_mat_table"]) { + qt_1 """ select * from ${table} order by id limit 10; """ + qt_2 """ select * from ${table} order by id,file_id limit 10; """ + qt_3 """ select score, value, active,name from ${table} order by id,file_id limit 10; """ + qt_4 """ select value,name,id,file_id from ${table} order by name limit 10; """ + + + for (int limit : limitValues) { + // Basic query + qt_test_basic """ + select * from ${table} + where value > 0 + order by id, score desc + limit ${limit}; + """ + + // Partial columns query + qt_test_partial """ + select id, name, score + from ${table} + where active = true + order by id, value desc + limit ${limit}; + """ + + // Multi-field sorting + qt_test_multi_sort """ + select id, name, value, score + from ${table} + order by id, score desc, value asc + limit ${limit}; + """ + + // Filter condition query + qt_test_filter """ + select id, name, value + from ${table} + where active = true and value > 100 + order by id, value desc + limit ${limit}; + """ + + // Subquery + qt_test_subquery """ + select t.id, t.name, t.total_score + from ( + select id, name, (value + score) as total_score + from ${table} + where active = true + ) t + order by t.id, t.total_score desc + limit ${limit}; + """ + + // Aggregation query + qt_test_agg """ + select + id, + max(score) as max_score, + avg(value) as avg_value + from ${table} + group by id + order by id, max_score desc + limit ${limit}; + """ + } + } + + // Multi-table query tests (join related), also using limit loop + for (int limit : limitValues) { + // Join query + qt_test_join1 """ + select o.id, o.name, o.value, p.score + from orc_topn_lazy_mat_table o + join parquet_topn_lazy_mat_table p + on o.id = p.id + where o.active = true + order by o.id, o.value desc + limit ${limit}; + """ + + // Left join + qt_test_join2 """ + select o.id, o.name, o.value, p.score + from orc_topn_lazy_mat_table o + left join parquet_topn_lazy_mat_table p + on o.id = p.id and o.file_id = p.file_id + where o.score > 0 + order by o.id, o.score desc, p.score desc + limit ${limit}; + """ + + // Complex join query + qt_test_complex """ + select + o.id, + o.name, + o.value, + o.score, + p.value as p_value + from orc_topn_lazy_mat_table o + left join parquet_topn_lazy_mat_table p + on o.id = p.id + where o.active = true + and o.value > 0 + and o.score is not null + order by o.id, o.score desc, p.value asc + limit ${limit}; + """ + } + } + + + + for (String hivePrefix : ["hive2"]) { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog = "test_hive_topn_lazy_mat_${hivePrefix}" + + sql """drop catalog if exists ${catalog}""" + sql """create catalog if not exists ${catalog} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + + logger.info("catalog " + catalog + " created") + sql """switch ${catalog};""" + + sql """ use global_lazy_mat_db; """ + + + sql """ + set enable_topn_lazy_materialization=true; + set runtime_filter_mode=GLOBAL; + set TOPN_FILTER_RATIO=0.5; + set disable_join_reorder=true; + set enable_runtime_filter_prune=false; + """ + + + explain { + sql "select * from orc_topn_lazy_mat_table order by id limit 10; " + contains("projectList:[id, name, value, active, score, file_id]") + contains("column_descs_lists[[`name` text NULL, `value` double NULL, `active` boolean NULL, `score` double NULL, `file_id` int NULL]]") + contains("locations: [[1, 2, 3, 4, 5]]") + contains("table_idxs: [[1, 2, 3, 4, 5]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__orc_topn_lazy_mat_table]") + } + + explain { + sql " select file_id,id from orc_topn_lazy_mat_table order by name limit 10; " + contains("projectList:[file_id, id]") + contains("column_descs_lists[[`id` int NULL, `file_id` int NULL]]") + contains("locations: [[1, 2]]") + contains("table_idxs: [[0, 5]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__orc_topn_lazy_mat_table]") + } + + explain { + sql """ select a.name,length(a.name),a.value,b.*,a.* from parquet_topn_lazy_mat_table as a + join orc_topn_lazy_mat_table as b on a.id = b.id order by a.name limit 10 """ + contains("projectList:[name, length(a.name), value, id, name, value, active, score, file_id, id, name, value, active, score, file_id]") + contains("column_descs_lists[[`name` text NULL, `value` double NULL, `active` boolean NULL, `score` double NULL, `file_id` int NULL], [`value` double NULL, `active` boolean NULL, `score` double NULL, `file_id` int NULL]]") + contains("locations: [[5, 6, 7, 8, 9], [10, 11, 12, 13]]") + contains("table_idxs: [[1, 2, 3, 4, 5], [2, 3, 4, 5]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__orc_topn_lazy_mat_table, __DORIS_GLOBAL_ROWID_COL__parquet_topn_lazy_mat_table]") + } + + runTopNLazyMatTests() + + + sql """ set enable_topn_lazy_materialization=false; """ + runTopNLazyMatTests() + + + + + } +} diff --git a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy index a12ab8a4f78ccc..568bb632decbe9 100644 --- a/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy +++ b/regression-test/suites/external_table_p0/hive/test_transactional_hive.groovy @@ -53,6 +53,21 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock """ } + def q01_par_limit = { + qt_q01_limit """ + select * from orc_full_acid_par order by id limit 3; + """ + qt_q02_limit """ + select value from orc_full_acid_par order by id limit 3; + """ + qt_q03_limit """ + select * from orc_full_acid_par where value = 'BB' order by id limit 3; + """ + qt_q04_limit """ + select * from orc_full_acid_par_empty limit 3; + """ + } + def test_acid = { sql """set enable_fallback_to_original_planner=false;""" @@ -77,6 +92,10 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock qt_12 """ select * from orc_acid_major order by id """ qt_15 """ select * from orc_acid_major where id < 3 order by id """ qt_16 """ select * from orc_acid_major where id > 3 order by id """ + + qt_17 """ select * from orc_acid_major order by id limit 1 """ + qt_18 """ select * from orc_acid_major where id < 3 order by id limit 1 """ + qt_19 """ select * from orc_acid_major where id > 3 order by id limit 1""" } def test_acid_write = { @@ -159,6 +178,7 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock test_acid_count() + q01_par_limit() sql """drop catalog if exists ${catalog_name}""" } finally { diff --git a/regression-test/suites/query_p0/sort/sort.groovy b/regression-test/suites/query_p0/sort/sort.groovy index 999e6743730acb..eb001b3e0a5893 100644 --- a/regression-test/suites/query_p0/sort/sort.groovy +++ b/regression-test/suites/query_p0/sort/sort.groovy @@ -20,6 +20,8 @@ // and modified by Doris. suite("sort") { + // this case is used to test defer materialze, and hence turn topn_lazy_materialization off + sql """set enable_topn_lazy_materialization=false;""" qt_sort_string_single_column """ select * from ( select '汇总' as a union all select '2022-01-01' as a ) a order by 1 """ qt_sort_string_multiple_columns """ select * from ( select '汇总' as a,1 as b union all select '2022-01-01' as a,1 as b ) a order by 1,2 """ qt_sort_string_on_fe """ select '汇总' > '2022-01-01' """ diff --git a/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy b/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy index 0df8ab260309c9..9fe4448a96944a 100644 --- a/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy +++ b/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy @@ -18,6 +18,8 @@ suite("topn_2pr_rule") { sql """set topn_opt_limit_threshold = 1024""" sql """set enable_two_phase_read_opt= true""" + // this case is used to test defer materialze, and hence turn topn_lazy_materialization off + sql """set enable_topn_lazy_materialization=false;""" def create_table = { table_name, key_type="DUPLICATE" -> sql "DROP TABLE IF EXISTS ${table_name}" diff --git a/regression-test/suites/query_p0/topn_lazy/ddl/customer_create.sql b/regression-test/suites/query_p0/topn_lazy/ddl/customer_create.sql new file mode 100644 index 00000000000000..71a8332ba485cd --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/ddl/customer_create.sql @@ -0,0 +1,15 @@ +CREATE TABLE IF NOT EXISTS `customer` ( + `c_custkey` int(11) NOT NULL COMMENT "", + `c_name` varchar(26) NOT NULL COMMENT "", + `c_address` varchar(41) NOT NULL COMMENT "", + `c_city` varchar(11) NOT NULL COMMENT "", + `c_nation` varchar(16) NOT NULL COMMENT "", + `c_region` varchar(13) NOT NULL COMMENT "", + `c_phone` varchar(16) NOT NULL COMMENT "", + `c_mktsegment` varchar(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`c_custkey`) +DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +); \ No newline at end of file diff --git a/regression-test/suites/query_p0/topn_lazy/ddl/dates_create.sql b/regression-test/suites/query_p0/topn_lazy/ddl/dates_create.sql new file mode 100644 index 00000000000000..574c608b48b1e1 --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/ddl/dates_create.sql @@ -0,0 +1,24 @@ +CREATE TABLE IF NOT EXISTS `date` ( + `d_datekey` int(11) NOT NULL COMMENT "", + `d_date` varchar(20) NOT NULL COMMENT "", + `d_dayofweek` varchar(10) NOT NULL COMMENT "", + `d_month` varchar(11) NOT NULL COMMENT "", + `d_year` int(11) NOT NULL COMMENT "", + `d_yearmonthnum` int(11) NOT NULL COMMENT "", + `d_yearmonth` varchar(9) NOT NULL COMMENT "", + `d_daynuminweek` int(11) NOT NULL COMMENT "", + `d_daynuminmonth` int(11) NOT NULL COMMENT "", + `d_daynuminyear` int(11) NOT NULL COMMENT "", + `d_monthnuminyear` int(11) NOT NULL COMMENT "", + `d_weeknuminyear` int(11) NOT NULL COMMENT "", + `d_sellingseason` varchar(14) NOT NULL COMMENT "", + `d_lastdayinweekfl` int(11) NOT NULL COMMENT "", + `d_lastdayinmonthfl` int(11) NOT NULL COMMENT "", + `d_holidayfl` int(11) NOT NULL COMMENT "", + `d_weekdayfl` int(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`d_datekey`) +DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +); \ No newline at end of file diff --git a/regression-test/suites/query_p0/topn_lazy/ddl/lineorder_create.sql b/regression-test/suites/query_p0/topn_lazy/ddl/lineorder_create.sql new file mode 100644 index 00000000000000..0653220101ae09 --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/ddl/lineorder_create.sql @@ -0,0 +1,24 @@ +CREATE TABLE IF NOT EXISTS `lineorder` ( + `lo_orderkey` bigint(20) NOT NULL COMMENT "", + `lo_linenumber` bigint(20) NOT NULL COMMENT "", + `lo_custkey` int(11) NOT NULL COMMENT "", + `lo_partkey` int(11) NOT NULL COMMENT "", + `lo_suppkey` int(11) NOT NULL COMMENT "", + `lo_orderdate` int(11) NOT NULL COMMENT "", + `lo_orderpriority` varchar(16) NOT NULL COMMENT "", + `lo_shippriority` int(11) NOT NULL COMMENT "", + `lo_quantity` bigint(20) NOT NULL COMMENT "", + `lo_extendedprice` bigint(20) NOT NULL COMMENT "", + `lo_ordtotalprice` bigint(20) NOT NULL COMMENT "", + `lo_discount` bigint(20) NOT NULL COMMENT "", + `lo_revenue` bigint(20) NOT NULL COMMENT "", + `lo_supplycost` bigint(20) NOT NULL COMMENT "", + `lo_tax` bigint(20) NOT NULL COMMENT "", + `lo_commitdate` bigint(20) NOT NULL COMMENT "", + `lo_shipmode` varchar(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`lo_orderkey`, `lo_linenumber`) +DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +); \ No newline at end of file diff --git a/regression-test/suites/query_p0/topn_lazy/ddl/part_create.sql b/regression-test/suites/query_p0/topn_lazy/ddl/part_create.sql new file mode 100644 index 00000000000000..c18b21be2a9b9f --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/ddl/part_create.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS `part` ( + `p_partkey` int(11) NOT NULL COMMENT "", + `p_name` varchar(23) NOT NULL COMMENT "", + `p_mfgr` varchar(7) NOT NULL COMMENT "", + `p_category` varchar(8) NOT NULL COMMENT "", + `p_brand` varchar(10) NOT NULL COMMENT "", + `p_color` varchar(12) NOT NULL COMMENT "", + `p_type` varchar(26) NOT NULL COMMENT "", + `p_size` int(11) NOT NULL COMMENT "", + `p_container` varchar(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`p_partkey`) +DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +); \ No newline at end of file diff --git a/regression-test/suites/query_p0/topn_lazy/ddl/ssb.tables.sql b/regression-test/suites/query_p0/topn_lazy/ddl/ssb.tables.sql new file mode 100644 index 00000000000000..3d143944f93b48 --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/ddl/ssb.tables.sql @@ -0,0 +1,89 @@ +CREATE TABLE IF NOT EXISTS `customer` ( + `c_custkey` int(11) NOT NULL COMMENT "", + `c_name` varchar(26) NOT NULL COMMENT "", + `c_address` varchar(41) NOT NULL COMMENT "", + `c_city` varchar(11) NOT NULL COMMENT "", + `c_nation` varchar(16) NOT NULL COMMENT "", + `c_region` varchar(13) NOT NULL COMMENT "", + `c_phone` varchar(16) NOT NULL COMMENT "", + `c_mktsegment` varchar(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`c_custkey`) +DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +);CREATE TABLE IF NOT EXISTS `date` ( + `d_datekey` int(11) NOT NULL COMMENT "", + `d_date` varchar(20) NOT NULL COMMENT "", + `d_dayofweek` varchar(10) NOT NULL COMMENT "", + `d_month` varchar(11) NOT NULL COMMENT "", + `d_year` int(11) NOT NULL COMMENT "", + `d_yearmonthnum` int(11) NOT NULL COMMENT "", + `d_yearmonth` varchar(9) NOT NULL COMMENT "", + `d_daynuminweek` int(11) NOT NULL COMMENT "", + `d_daynuminmonth` int(11) NOT NULL COMMENT "", + `d_daynuminyear` int(11) NOT NULL COMMENT "", + `d_monthnuminyear` int(11) NOT NULL COMMENT "", + `d_weeknuminyear` int(11) NOT NULL COMMENT "", + `d_sellingseason` varchar(14) NOT NULL COMMENT "", + `d_lastdayinweekfl` int(11) NOT NULL COMMENT "", + `d_lastdayinmonthfl` int(11) NOT NULL COMMENT "", + `d_holidayfl` int(11) NOT NULL COMMENT "", + `d_weekdayfl` int(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`d_datekey`) +DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +);CREATE TABLE IF NOT EXISTS `lineorder` ( + `lo_orderkey` bigint(20) NOT NULL COMMENT "", + `lo_linenumber` bigint(20) NOT NULL COMMENT "", + `lo_custkey` int(11) NOT NULL COMMENT "", + `lo_partkey` int(11) NOT NULL COMMENT "", + `lo_suppkey` int(11) NOT NULL COMMENT "", + `lo_orderdate` int(11) NOT NULL COMMENT "", + `lo_orderpriority` varchar(16) NOT NULL COMMENT "", + `lo_shippriority` int(11) NOT NULL COMMENT "", + `lo_quantity` bigint(20) NOT NULL COMMENT "", + `lo_extendedprice` bigint(20) NOT NULL COMMENT "", + `lo_ordtotalprice` bigint(20) NOT NULL COMMENT "", + `lo_discount` bigint(20) NOT NULL COMMENT "", + `lo_revenue` bigint(20) NOT NULL COMMENT "", + `lo_supplycost` bigint(20) NOT NULL COMMENT "", + `lo_tax` bigint(20) NOT NULL COMMENT "", + `lo_commitdate` bigint(20) NOT NULL COMMENT "", + `lo_shipmode` varchar(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`lo_orderkey`, `lo_linenumber`) +DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +);CREATE TABLE IF NOT EXISTS `part` ( + `p_partkey` int(11) NOT NULL COMMENT "", + `p_name` varchar(23) NOT NULL COMMENT "", + `p_mfgr` varchar(7) NOT NULL COMMENT "", + `p_category` varchar(8) NOT NULL COMMENT "", + `p_brand` varchar(10) NOT NULL COMMENT "", + `p_color` varchar(12) NOT NULL COMMENT "", + `p_type` varchar(26) NOT NULL COMMENT "", + `p_size` int(11) NOT NULL COMMENT "", + `p_container` varchar(11) NOT NULL COMMENT "" +) +DUPLICATE KEY (`p_partkey`) +DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +);CREATE TABLE IF NOT EXISTS `supplier` ( + `s_suppkey` int(11) NOT NULL COMMENT "", + `s_name` varchar(26) NOT NULL COMMENT "", + `s_address` varchar(26) NOT NULL COMMENT "", + `s_city` varchar(11) NOT NULL COMMENT "", + `s_nation` varchar(16) NOT NULL COMMENT "", + `s_region` varchar(13) NOT NULL COMMENT "", + `s_phone` varchar(16) NOT NULL COMMENT "" +) +DUPLICATE KEY (`s_suppkey`) +DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +); \ No newline at end of file diff --git a/regression-test/suites/query_p0/topn_lazy/ddl/supplier_create.sql b/regression-test/suites/query_p0/topn_lazy/ddl/supplier_create.sql new file mode 100644 index 00000000000000..4eabcf21c3db36 --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/ddl/supplier_create.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS `supplier` ( + `s_suppkey` int(11) NOT NULL COMMENT "", + `s_name` varchar(26) NOT NULL COMMENT "", + `s_address` varchar(26) NOT NULL COMMENT "", + `s_city` varchar(11) NOT NULL COMMENT "", + `s_nation` varchar(16) NOT NULL COMMENT "", + `s_region` varchar(13) NOT NULL COMMENT "", + `s_phone` varchar(16) NOT NULL COMMENT "" +) +DUPLICATE KEY (`s_suppkey`) +DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 1 +PROPERTIES ( +"replication_num" = "1" +); \ No newline at end of file diff --git a/regression-test/suites/query_p0/topn_lazy/load.groovy b/regression-test/suites/query_p0/topn_lazy/load.groovy new file mode 100644 index 00000000000000..ec6eba21ced993 --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/load.groovy @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Most of the cases are copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases +// and modified by Doris. + +// Note: To filter out tables from sql files, use the following one-liner comamnd +// sed -nr 's/.*tables: (.*)$/\1/gp' /path/to/*.sql | sed -nr 's/,/\n/gp' | sort | uniq +suite("load") { + + // ssb_sf1_p1 is writted to test unique key table merge correctly. + // It creates unique key table and sets bucket num to 1 in order to make sure that + // many rowsets will be created during loading and then the merge process will be triggered. + + def tables = ["customer", "lineorder", "part", "date", "supplier"] + def columns = ["""c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use""", + """lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority, + lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount, + lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy""", + """p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy""", + """d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth, + d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear, + d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy""", + """s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy"""] + + sql new File("""${context.file.parent}/ddl/ssb.tables.sql""").text + + def i = 0 + for (String tableName in tables) { + streamLoad { + // a default db 'regression_test' is specified in + // ${DORIS_HOME}/conf/regression-conf.groovy + table tableName + + // default label is UUID: + // set 'label' UUID.randomUUID().toString() + + // default column_separator is specify in doris fe config, usually is '\t'. + // this line change to ',' + set 'column_separator', '|' + set 'compress_type', 'GZ' + set 'columns', columns[i] + // relate to ${DORIS_HOME}/regression-test/data/demo/streamload_input.csv. + // also, you can stream load a http stream, e.g. http://xxx/some.csv + file """${getS3Url()}/regression/ssb/sf0.1/${tableName}.tbl.gz""" + + time 10000 // limit inflight 10s + + // stream load action will check result, include Success status, and NumberTotalRows == NumberLoadedRows + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + i++ + } + + sql """ sync """ +} diff --git a/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy b/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy new file mode 100644 index 00000000000000..1c56a0154816fe --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy @@ -0,0 +1,208 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("topn_lazy") { + sql """ + set enable_topn_lazy_materialization=true; + set runtime_filter_mode=GLOBAL; + set TOPN_FILTER_RATIO=0.5; + set disable_join_reorder=true; + """ + // ========single table =========== + //single table select all slots + explain { + sql "select * from lineorder where lo_orderkey>100 order by lo_orderkey limit 1; " + contains("projectList:[lo_orderkey, lo_linenumber, lo_custkey, lo_partkey, lo_suppkey, lo_orderdate, lo_orderpriority, lo_shippriority, lo_quantity, lo_extendedprice, lo_ordtotalprice, lo_discount, lo_revenue, lo_supplycost, lo_tax, lo_commitdate, lo_shipmode]") + contains("column_descs_lists[[`lo_linenumber` bigint NOT NULL, `lo_custkey` int NOT NULL, `lo_partkey` int NOT NULL, `lo_suppkey` int NOT NULL, `lo_orderdate` int NOT NULL, `lo_orderpriority` varchar(16) NOT NULL, `lo_shippriority` int NOT NULL, `lo_quantity` bigint NOT NULL, `lo_extendedprice` bigint NOT NULL, `lo_ordtotalprice` bigint NOT NULL, `lo_discount` bigint NOT NULL, `lo_revenue` bigint NOT NULL, `lo_supplycost` bigint NOT NULL, `lo_tax` bigint NOT NULL, `lo_commitdate` bigint NOT NULL, `lo_shipmode` varchar(11) NOT NULL]]") + contains("locations: [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__lineorder]") + } + + + // single table select some slots + explain { + sql "select lo_suppkey, lo_commitdate from lineorder where lo_orderkey>100 order by lo_orderkey limit 1; " + contains("projectList:[lo_suppkey, lo_commitdate]") + contains("column_descs_lists[[`lo_suppkey` int NOT NULL, `lo_commitdate` bigint NOT NULL]]") + contains("locations: [[1, 2]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__lineorder]") + } + + // switch output slot order + explain { + sql("select lo_commitdate, lo_suppkey from lineorder where lo_orderkey>100 order by lo_orderkey limit 1; ") + contains("projectList:[lo_commitdate, lo_suppkey]") + contains("column_descs_lists[[`lo_suppkey` int NOT NULL, `lo_commitdate` bigint NOT NULL]]") + contains("locations: [[1, 2]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__lineorder]") + } + + + //============ join ================ + explain { + sql("select * from lineorder, date where d_datekey > 0 and lo_orderdate = d_datekey order by d_date limit 5;") + contains("projectList:[lo_orderkey, lo_linenumber, lo_custkey, lo_partkey, lo_suppkey, lo_orderdate, lo_orderpriority, lo_shippriority, lo_quantity, lo_extendedprice, lo_ordtotalprice, lo_discount, lo_revenue, lo_supplycost, lo_tax, lo_commitdate, lo_shipmode, d_datekey, d_date, d_dayofweek, d_month, d_year, d_yearmonthnum, d_yearmonth, d_daynuminweek, d_daynuminmonth, d_daynuminyear, d_monthnuminyear, d_weeknuminyear, d_sellingseason, d_lastdayinweekfl, d_lastdayinmonthfl, d_holidayfl, d_weekdayfl]") + contains("column_descs_lists[[`lo_orderkey` bigint NOT NULL, `lo_linenumber` bigint NOT NULL, `lo_custkey` int NOT NULL, `lo_partkey` int NOT NULL, `lo_suppkey` int NOT NULL, `lo_orderpriority` varchar(16) NOT NULL, `lo_shippriority` int NOT NULL, `lo_quantity` bigint NOT NULL, `lo_extendedprice` bigint NOT NULL, `lo_ordtotalprice` bigint NOT NULL, `lo_discount` bigint NOT NULL, `lo_revenue` bigint NOT NULL, `lo_supplycost` bigint NOT NULL, `lo_tax` bigint NOT NULL, `lo_commitdate` bigint NOT NULL, `lo_shipmode` varchar(11) NOT NULL], [`d_dayofweek` varchar(10) NOT NULL, `d_month` varchar(11) NOT NULL, `d_year` int NOT NULL, `d_yearmonthnum` int NOT NULL, `d_yearmonth` varchar(9) NOT NULL, `d_daynuminweek` int NOT NULL, `d_daynuminmonth` int NOT NULL, `d_daynuminyear` int NOT NULL, `d_monthnuminyear` int NOT NULL, `d_weeknuminyear` int NOT NULL, `d_sellingseason` varchar(14) NOT NULL, `d_lastdayinweekfl` int NOT NULL, `d_lastdayinmonthfl` int NOT NULL, `d_holidayfl` int NOT NULL, `d_weekdayfl` int NOT NULL]]") + contains("locations: [[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__lineorder, __DORIS_GLOBAL_ROWID_COL__date]") + } + + + explain { + sql "select lineorder.*, date.* from lineorder, date where d_datekey > 0 and lo_orderdate = d_datekey order by d_date limit 5;" + contains("projectList:[lo_orderkey, lo_linenumber, lo_custkey, lo_partkey, lo_suppkey, lo_orderdate, lo_orderpriority, lo_shippriority, lo_quantity, lo_extendedprice, lo_ordtotalprice, lo_discount, lo_revenue, lo_supplycost, lo_tax, lo_commitdate, lo_shipmode, d_datekey, d_date, d_dayofweek, d_month, d_year, d_yearmonthnum, d_yearmonth, d_daynuminweek, d_daynuminmonth, d_daynuminyear, d_monthnuminyear, d_weeknuminyear, d_sellingseason, d_lastdayinweekfl, d_lastdayinmonthfl, d_holidayfl, d_weekdayfl]") + contains("column_descs_lists[[`lo_orderkey` bigint NOT NULL, `lo_linenumber` bigint NOT NULL, `lo_custkey` int NOT NULL, `lo_partkey` int NOT NULL, `lo_suppkey` int NOT NULL, `lo_orderpriority` varchar(16) NOT NULL, `lo_shippriority` int NOT NULL, `lo_quantity` bigint NOT NULL, `lo_extendedprice` bigint NOT NULL, `lo_ordtotalprice` bigint NOT NULL, `lo_discount` bigint NOT NULL, `lo_revenue` bigint NOT NULL, `lo_supplycost` bigint NOT NULL, `lo_tax` bigint NOT NULL, `lo_commitdate` bigint NOT NULL, `lo_shipmode` varchar(11) NOT NULL], [`d_dayofweek` varchar(10) NOT NULL, `d_month` varchar(11) NOT NULL, `d_year` int NOT NULL, `d_yearmonthnum` int NOT NULL, `d_yearmonth` varchar(9) NOT NULL, `d_daynuminweek` int NOT NULL, `d_daynuminmonth` int NOT NULL, `d_daynuminyear` int NOT NULL, `d_monthnuminyear` int NOT NULL, `d_weeknuminyear` int NOT NULL, `d_sellingseason` varchar(14) NOT NULL, `d_lastdayinweekfl` int NOT NULL, `d_lastdayinmonthfl` int NOT NULL, `d_holidayfl` int NOT NULL, `d_weekdayfl` int NOT NULL]]") + contains("locations: [[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__lineorder, __DORIS_GLOBAL_ROWID_COL__date]") + } + + explain{ + sql "select date.*, lineorder.* from date, lineorder where d_datekey > 0 and lo_orderdate = d_datekey order by d_date limit 5;" + contains("projectList:[d_datekey, d_date, d_dayofweek, d_month, d_year, d_yearmonthnum, d_yearmonth, d_daynuminweek, d_daynuminmonth, d_daynuminyear, d_monthnuminyear, d_weeknuminyear, d_sellingseason, d_lastdayinweekfl, d_lastdayinmonthfl, d_holidayfl, d_weekdayfl, lo_orderkey, lo_linenumber, lo_custkey, lo_partkey, lo_suppkey, lo_orderdate, lo_orderpriority, lo_shippriority, lo_quantity, lo_extendedprice, lo_ordtotalprice, lo_discount, lo_revenue, lo_supplycost, lo_tax, lo_commitdate, lo_shipmode]") + contains("column_descs_lists[[`d_dayofweek` varchar(10) NOT NULL, `d_month` varchar(11) NOT NULL, `d_year` int NOT NULL, `d_yearmonthnum` int NOT NULL, `d_yearmonth` varchar(9) NOT NULL, `d_daynuminweek` int NOT NULL, `d_daynuminmonth` int NOT NULL, `d_daynuminyear` int NOT NULL, `d_monthnuminyear` int NOT NULL, `d_weeknuminyear` int NOT NULL, `d_sellingseason` varchar(14) NOT NULL, `d_lastdayinweekfl` int NOT NULL, `d_lastdayinmonthfl` int NOT NULL, `d_holidayfl` int NOT NULL, `d_weekdayfl` int NOT NULL], [`lo_orderkey` bigint NOT NULL, `lo_linenumber` bigint NOT NULL, `lo_custkey` int NOT NULL, `lo_partkey` int NOT NULL, `lo_suppkey` int NOT NULL, `lo_orderpriority` varchar(16) NOT NULL, `lo_shippriority` int NOT NULL, `lo_quantity` bigint NOT NULL, `lo_extendedprice` bigint NOT NULL, `lo_ordtotalprice` bigint NOT NULL, `lo_discount` bigint NOT NULL, `lo_revenue` bigint NOT NULL, `lo_supplycost` bigint NOT NULL, `lo_tax` bigint NOT NULL, `lo_commitdate` bigint NOT NULL, `lo_shipmode` varchar(11) NOT NULL]]") + contains("locations: [[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]]") + contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__date, __DORIS_GLOBAL_ROWID_COL__lineorder]") + } + + //======multi topn==== + explain { + sql """ select * + from ( + select * from lineorder order by lo_custkey limit 100 + ) T join customer on c_custkey=lo_partkey + order by c_name limit 1; + """ + multiContains("VMaterializeNode", 1) + } + + explain { + sql """ select * + from + customer left semi join ( + select * from lineorder order by lo_custkey limit 100 + ) T on c_custkey=lo_partkey + order by c_name limit 1; + """ + multiContains("VMaterializeNode", 1) + } + + explain { + sql """ select * + from + customer left semi join ( + select * from lineorder order by lo_custkey limit 100 + ) T on c_custkey=lo_partkey + order by c_name limit 1; + """ + multiContains("VMaterializeNode", 1) + } + + qt_test_lazy1 """select * from date order by d_date limit 10;""" + + qt_test_lazy2 """SELECT d_datekey, d_date, d_dayofweek, d_month, d_year, d_yearmonthnum, d_daynuminweek, d_monthnuminyear, d_sellingseason FROM date ORDER BY d_date LIMIT 10;""" + + // test topn with row store + sql """ DROP TABLE IF EXISTS date_row_store """ + sql """ + CREATE TABLE `date_row_store` ( + `d_datekey` int NOT NULL, + `d_date` varchar(20) NOT NULL, + `d_dayofweek` varchar(10) NOT NULL, + `d_month` varchar(11) NOT NULL, + `d_year` int NOT NULL, + `d_yearmonthnum` int NOT NULL, + `d_yearmonth` varchar(9) NOT NULL, + `d_daynuminweek` int NOT NULL, + `d_daynuminmonth` int NOT NULL, + `d_daynuminyear` int NOT NULL, + `d_monthnuminyear` int NOT NULL, + `d_weeknuminyear` int NOT NULL, + `d_sellingseason` varchar(14) NOT NULL, + `d_lastdayinweekfl` int NOT NULL, + `d_lastdayinmonthfl` int NOT NULL, + `d_holidayfl` int NOT NULL, + `d_weekdayfl` int NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`d_datekey`) + DISTRIBUTED BY HASH(`d_datekey`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "store_row_column" = "true" + ); """ + sql """ INSERT INTO date_row_store select * from date; """ + + qt_test_lazy3 """ select * from date_row_store order by d_date limit 10; """ + + qt_test_lazy4 """SELECT d_datekey, d_date, d_dayofweek, d_month, d_year, d_yearmonthnum, d_daynuminweek, d_monthnuminyear, d_sellingseason FROM date_row_store ORDER BY d_date LIMIT 10;""" + + // Add new test cases for LEFT JOIN with different column orders + sql """ DROP TABLE IF EXISTS users """ + sql """ CREATE TABLE users ( + user_id INT, + user_name VARCHAR(50) + ) DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); """ + + sql """ DROP TABLE IF EXISTS orders """ + sql """ CREATE TABLE orders ( + order_id INT, + user_id INT, + order_amount DECIMAL(10,2) + ) DISTRIBUTED BY HASH(user_id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); """ + + sql """ INSERT INTO users VALUES + (1, 'Alice'), + (2, 'Bob'), + (3, 'Charlie'), + (4, 'David'), + (5, 'Eve'); """ + + sql """ INSERT INTO orders VALUES + (101, 1, 100.50), + (102, 2, 200.75), + (103, 3, 150.00); """ + + // Test case 1: Original column order + qt_test_lazy5 """ + SELECT u.user_id, u.user_name, o.order_id, o.order_amount + FROM users u LEFT JOIN orders o ON u.user_id = o.user_id + ORDER BY u.user_id LIMIT 5; + """ + + // Test case 2: Different column order + qt_test_lazy6 """ + SELECT o.order_amount, u.user_name, o.order_id, u.user_id + FROM users u LEFT JOIN orders o ON u.user_id = o.user_id + ORDER BY u.user_id LIMIT 5; + """ + + // Test case 3: Another column order variation + qt_test_lazy7 """ + SELECT u.user_name, o.order_id, u.user_id, o.order_amount + FROM users u LEFT JOIN orders o ON u.user_id = o.user_id + ORDER BY u.user_id LIMIT 5; + """ + + // Cleanup tables + sql """ DROP TABLE IF EXISTS users """ + sql """ DROP TABLE IF EXISTS orders """ +} diff --git a/regression-test/suites/query_p0/topn_lazy/topn_lazy_on_data_model.groovy b/regression-test/suites/query_p0/topn_lazy/topn_lazy_on_data_model.groovy new file mode 100644 index 00000000000000..a2ee458f92b067 --- /dev/null +++ b/regression-test/suites/query_p0/topn_lazy/topn_lazy_on_data_model.groovy @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("topn_lazy_on_data_model") { + // mor unique key user_id is not lazy materialized + sql """ + drop table if exists mor; + CREATE TABLE mor + ( + `user_id` LARGEINT NOT NULL, + `username` VARCHAR(50) NOT NULL, + age int + ) + UNIQUE KEY(user_id, username) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "false" + ); + + insert into mor values ( 1, 'a', 10),(1,'b', 20); + """ + + qt_shape "explain shape plan select * from mor order by username limit 1" + + + // mow unique key user_id is lazy materialized + sql """ + drop table if exists mow; + CREATE TABLE IF NOT EXISTS mow + ( + `user_id` LARGEINT NOT NULL, + `username` VARCHAR(50) NOT NULL, + age int + ) + UNIQUE KEY(user_id, username) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + ); + + insert into mow values ( 1, 'a', 10),(1,'b', 20); + """ + + qt_shape "explain shape plan select * from mow order by username limit 1" + + // agg key user_id is lazy materialized + sql """ + drop table if exists agg; + CREATE TABLE IF NOT EXISTS agg + ( + `user_id` LARGEINT NOT NULL, + `username` VARCHAR(50) NOT NULL, + age int REPLACE + ) + aggregate KEY(user_id, username) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + insert into agg values ( 1, 'a', 10),(1,'b', 20); + """ + + qt_shape "explain shape plan select * from agg order by username limit 1" +} \ No newline at end of file