From e8cbbcdc6c6433371e6860b1c0a58d95bb2c7c9c Mon Sep 17 00:00:00 2001 From: minghong Date: Fri, 27 Jun 2025 16:19:48 +0800 Subject: [PATCH 1/2] set slot order shuffle --- .../post/runtimefilterv2/RuntimeFilterV2Generator.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/runtimefilterv2/RuntimeFilterV2Generator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/runtimefilterv2/RuntimeFilterV2Generator.java index c9fee46a5ef8d8..0b9d810ac7fce2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/runtimefilterv2/RuntimeFilterV2Generator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/runtimefilterv2/RuntimeFilterV2Generator.java @@ -19,6 +19,7 @@ import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.processor.post.PlanPostProcessor; +import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.AbstractPlan; import org.apache.doris.nereids.trees.plans.Plan; @@ -61,14 +62,16 @@ private void computeRuntimeFilterForIntersectAndExcept(PhysicalSetOperation setO && child0.getStats().getRowCount() < ConnectContext.get().getSessionVariable().runtimeFilterMaxBuildRowCount) { for (int slotIdx : chooseSourceSlots(setOp)) { + Expression sourceExpression = setOp.getRegularChildrenOutputs().get(0).get(slotIdx); for (int childId = 1; childId < setOp.children().size(); childId++) { Plan child = setOp.children().get(childId); + Expression targetExpression = setOp.getRegularChildrenOutputs().get(childId).get(slotIdx); Statistics stats = child0.getStats(); long buildNdvOrRowCount = -1; if (stats != null) { buildNdvOrRowCount = (long) stats.getRowCount(); ColumnStatistic colStats = stats.findColumnStatistics( - setOp.child(0).getOutput().get(slotIdx)); + sourceExpression); if (colStats != null && !colStats.isUnKnown) { buildNdvOrRowCount = Math.max(1, (long) colStats.ndv); } @@ -76,10 +79,10 @@ private void computeRuntimeFilterForIntersectAndExcept(PhysicalSetOperation setO PushDownContext pushDownContext = new PushDownContext( context.getRuntimeFilterV2Context(), setOp, - setOp.child(0).getOutput().get(slotIdx), + sourceExpression, buildNdvOrRowCount, slotIdx, - setOp.child(childId).getOutput().get(slotIdx)); + targetExpression); child.accept(PushDownVisitor.INSTANCE, pushDownContext); } } From d530eac20620066710dbf185ecf0d491770a02a0 Mon Sep 17 00:00:00 2001 From: minghong Date: Fri, 27 Jun 2025 16:35:52 +0800 Subject: [PATCH 2/2] fix --- .../shape_check/tpcds_sf100/noStatsRfPrune/query38.out | 10 +++++----- .../shape_check/tpcds_sf100/no_stats_shape/query38.out | 10 +++++----- .../data/shape_check/tpcds_sf100/rf_prune/query38.out | 10 +++++----- .../data/shape_check/tpcds_sf100/rf_prune/query87.out | 10 +++++----- .../data/shape_check/tpcds_sf100/shape/query38.out | 10 +++++----- .../data/shape_check/tpcds_sf100/shape/query87.out | 10 +++++----- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out index 71a88a9dec17a4..550170a1536437 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query38.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------PhysicalIntersect RFV2: RF6[d_date->d_date] RF7[d_date->ss_customer_sk] RF8[d_date->c_customer_sk] +--------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() --------------------hashAgg[GLOBAL] @@ -32,8 +32,8 @@ PhysicalResultSink --------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -----------------------------------PhysicalOlapScan[date_dim] RFV2: RF6 ---------------------PhysicalOlapScan[customer] +----------------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalOlapScan[customer] RFV2: RF6 ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() --------------------hashAgg[GLOBAL] @@ -42,9 +42,9 @@ PhysicalResultSink --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RFV2: RF7 +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) ----------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalOlapScan[customer] RFV2: RF8 +--------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out index ef66d63d8dd0e8..6dff99a09a656a 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query38.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------PhysicalIntersect RFV2: RF6[d_date->d_date] RF7[d_date->ss_customer_sk] RF8[d_date->c_customer_sk] +--------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] --------------------hashAgg[GLOBAL] @@ -32,8 +32,8 @@ PhysicalResultSink --------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -----------------------------------PhysicalOlapScan[date_dim] RFV2: RF6 ---------------------PhysicalOlapScan[customer] +----------------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalOlapScan[customer] RFV2: RF6 ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] --------------------hashAgg[GLOBAL] @@ -42,9 +42,9 @@ PhysicalResultSink --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 RFV2: RF7 +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) ----------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalOlapScan[customer] RFV2: RF8 +--------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out index 2b08898c609613..b06a00731068b8 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------PhysicalIntersect RFV2: RF6[d_date->d_date] RF7[d_date->ss_customer_sk] RF8[d_date->c_customer_sk] +--------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() --------------------hashAgg[GLOBAL] @@ -32,8 +32,8 @@ PhysicalResultSink --------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -----------------------------------PhysicalOlapScan[date_dim] RFV2: RF6 ---------------------PhysicalOlapScan[customer] +----------------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalOlapScan[customer] RFV2: RF6 ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() --------------------hashAgg[GLOBAL] @@ -42,9 +42,9 @@ PhysicalResultSink --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RFV2: RF7 +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) ----------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalOlapScan[customer] RFV2: RF8 +--------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out index dd65bf922419fa..96d989c4955353 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out @@ -5,7 +5,7 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------hashAgg[LOCAL] --------PhysicalProject -----------PhysicalExcept RFV2: RF6[ss_customer_sk->d_date] RF7[ss_customer_sk->d_date] +----------PhysicalExcept RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] ------------PhysicalDistribute[DistributionSpecHash] --------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() ----------------hashAgg[GLOBAL] @@ -30,8 +30,8 @@ PhysicalResultSink ----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 --------------------------PhysicalProject ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) -------------------------------PhysicalOlapScan[date_dim] RFV2: RF6 -----------------PhysicalOlapScan[customer] +------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalOlapScan[customer] RFV2: RF6 ------------PhysicalDistribute[DistributionSpecHash] --------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() ----------------hashAgg[GLOBAL] @@ -43,6 +43,6 @@ PhysicalResultSink ----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 --------------------------PhysicalProject ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) -------------------------------PhysicalOlapScan[date_dim] RFV2: RF7 -----------------PhysicalOlapScan[customer] +------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query38.out b/regression-test/data/shape_check/tpcds_sf100/shape/query38.out index 33b048b25b1569..055f3f812dd0ba 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query38.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query38.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------PhysicalIntersect RFV2: RF6[d_date->d_date] RF7[d_date->ss_customer_sk] RF8[d_date->c_customer_sk] +--------------PhysicalIntersect RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] --------------------hashAgg[GLOBAL] @@ -32,8 +32,8 @@ PhysicalResultSink --------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) -----------------------------------PhysicalOlapScan[date_dim] RFV2: RF6 ---------------------PhysicalOlapScan[customer] +----------------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalOlapScan[customer] RFV2: RF6 ----------------PhysicalDistribute[DistributionSpecHash] ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] --------------------hashAgg[GLOBAL] @@ -42,9 +42,9 @@ PhysicalResultSink --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 RFV2: RF7 +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183)) ----------------------------------PhysicalOlapScan[date_dim] ---------------------PhysicalOlapScan[customer] RFV2: RF8 +--------------------PhysicalOlapScan[customer] RFV2: RF7 diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query87.out b/regression-test/data/shape_check/tpcds_sf100/shape/query87.out index 37a54a363e2a2d..61eb6848d23bfc 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query87.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query87.out @@ -5,7 +5,7 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------hashAgg[LOCAL] --------PhysicalProject -----------PhysicalExcept RFV2: RF6[ss_customer_sk->d_date] RF7[ss_customer_sk->d_date] +----------PhysicalExcept RFV2: RF6[c_last_name->c_last_name] RF7[c_last_name->c_last_name] ------------PhysicalDistribute[DistributionSpecHash] --------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] ----------------hashAgg[GLOBAL] @@ -30,8 +30,8 @@ PhysicalResultSink ----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 --------------------------PhysicalProject ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) -------------------------------PhysicalOlapScan[date_dim] RFV2: RF6 -----------------PhysicalOlapScan[customer] +------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalOlapScan[customer] RFV2: RF6 ------------PhysicalDistribute[DistributionSpecHash] --------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] ----------------hashAgg[GLOBAL] @@ -43,6 +43,6 @@ PhysicalResultSink ----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 --------------------------PhysicalProject ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184)) -------------------------------PhysicalOlapScan[date_dim] RFV2: RF7 -----------------PhysicalOlapScan[customer] +------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalOlapScan[customer] RFV2: RF7