From 7b2b92394c3d58e0e16c216cae2a78660d355e92 Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Mon, 17 Oct 2022 11:16:51 +0800
Subject: [PATCH 01/12] [Phase1] Partition and Sort Enforcement rule

---
 .../examples/custom_datasource.rs             |   6 +
 datafusion/core/src/execution/context.rs      |   6 +-
 .../physical_optimizer/coalesce_batches.rs    |  35 +-
 .../src/physical_optimizer/enforcement.rs     | 858 ++++++++++++++++++
 .../core/src/physical_optimizer/merge_exec.rs |  36 +-
 datafusion/core/src/physical_optimizer/mod.rs |   1 +
 .../src/physical_optimizer/repartition.rs     |   5 +-
 .../core/src/physical_optimizer/utils.rs      |  71 ++
 .../core/src/physical_plan/aggregates/mod.rs  |  76 +-
 datafusion/core/src/physical_plan/analyze.rs  |   9 +-
 .../src/physical_plan/coalesce_batches.rs     |   8 +-
 .../src/physical_plan/coalesce_partitions.rs  |   5 +-
 .../core/src/physical_plan/cross_join.rs      |   8 +-
 datafusion/core/src/physical_plan/empty.rs    |  11 +-
 datafusion/core/src/physical_plan/explain.rs  |   5 +-
 .../src/physical_plan/file_format/avro.rs     |   5 +-
 .../core/src/physical_plan/file_format/csv.rs |   9 +-
 .../src/physical_plan/file_format/json.rs     |   5 +-
 .../src/physical_plan/file_format/parquet.rs  |   6 +-
 datafusion/core/src/physical_plan/filter.rs   |  51 +-
 .../core/src/physical_plan/hash_join.rs       |  53 +-
 datafusion/core/src/physical_plan/limit.rs    |  26 +-
 datafusion/core/src/physical_plan/memory.rs   |   5 +-
 datafusion/core/src/physical_plan/mod.rs      | 206 ++++-
 datafusion/core/src/physical_plan/planner.rs  |  70 +-
 .../core/src/physical_plan/projection.rs      |  58 +-
 .../core/src/physical_plan/repartition.rs     |   9 +-
 .../core/src/physical_plan/sort_merge_join.rs |  86 +-
 .../core/src/physical_plan/sorts/sort.rs      |  20 +-
 .../sorts/sort_preserving_merge.rs            |  13 +-
 datafusion/core/src/physical_plan/union.rs    | 113 ++-
 datafusion/core/src/physical_plan/values.rs   |  13 +-
 .../core/src/physical_plan/windows/mod.rs     |   4 +
 .../physical_plan/windows/window_agg_exec.rs  |  36 +-
 .../core/src/scheduler/pipeline/execution.rs  |  12 +-
 datafusion/core/src/test/exec.rs              |  21 +
 datafusion/core/tests/custom_sources.rs       |   5 +
 .../core/tests/provider_filter_pushdown.rs    |   6 +
 datafusion/core/tests/statistics.rs           |   5 +
 datafusion/core/tests/user_defined_plan.rs    |   9 +-
 datafusion/physical-expr/Cargo.toml           |   1 +
 .../physical-expr/src/expressions/binary.rs   |  25 +
 .../physical-expr/src/expressions/case.rs     |  64 ++
 .../physical-expr/src/expressions/cast.rs     |  31 +
 .../physical-expr/src/expressions/column.rs   |  22 +
 .../physical-expr/src/expressions/datetime.rs |  34 +-
 .../src/expressions/get_indexed_field.rs      |  24 +
 .../physical-expr/src/expressions/in_list.rs  |  52 +-
 .../src/expressions/is_not_null.rs            |  21 +
 .../physical-expr/src/expressions/is_null.rs  |  21 +
 .../physical-expr/src/expressions/literal.rs  |  23 +-
 .../physical-expr/src/expressions/mod.rs      |   1 +
 .../physical-expr/src/expressions/negative.rs |  21 +
 .../physical-expr/src/expressions/no_op.rs    |  87 ++
 .../physical-expr/src/expressions/not.rs      |  21 +
 .../physical-expr/src/expressions/try_cast.rs |  24 +
 datafusion/physical-expr/src/lib.rs           |  11 +
 datafusion/physical-expr/src/physical_expr.rs | 249 ++++-
 .../physical-expr/src/scalar_function.rs      |  22 +
 datafusion/physical-expr/src/sort_expr.rs     |   6 +
 datafusion/physical-expr/src/utils.rs         | 117 +++
 61 files changed, 2604 insertions(+), 259 deletions(-)
 create mode 100644 datafusion/core/src/physical_optimizer/enforcement.rs
 create mode 100644 datafusion/physical-expr/src/expressions/no_op.rs
 create mode 100644 datafusion/physical-expr/src/utils.rs

diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs
index fd8f945f27701..d986500539d8e 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_datasource.rs
@@ -222,6 +222,12 @@ impl ExecutionPlan for CustomExec {
         None
     }
 
+    fn equivalence_properties(
+        &self,
+    ) -> Vec<Vec<datafusion::physical_expr::expressions::Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         vec![]
     }
diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs
index f65c849e482e8..ac60fa5ed7ef6 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -85,7 +85,6 @@ use crate::optimizer::subquery_filter_to_join::SubqueryFilterToJoin;
 use datafusion_sql::{ResolvedTableReference, TableReference};
 
 use crate::physical_optimizer::coalesce_batches::CoalesceBatches;
-use crate::physical_optimizer::merge_exec::AddCoalescePartitionsExec;
 use crate::physical_optimizer::repartition::Repartition;
 
 use crate::config::{
@@ -95,6 +94,7 @@ use crate::config::{
 use crate::datasource::datasource::TableProviderFactory;
 use crate::execution::runtime_env::RuntimeEnv;
 use crate::logical_plan::plan::Explain;
+use crate::physical_optimizer::enforcement::BasicEnforcement;
 use crate::physical_plan::file_format::{plan_to_csv, plan_to_json, plan_to_parquet};
 use crate::physical_plan::planner::DefaultPhysicalPlanner;
 use crate::physical_plan::udaf::AggregateUDF;
@@ -1503,6 +1503,7 @@ impl SessionState {
             Arc::new(AggregateStatistics::new()),
             Arc::new(HashBuildProbeOrder::new()),
         ];
+        physical_optimizers.push(Arc::new(BasicEnforcement::new()));
         if config
             .config_options
             .read()
@@ -1520,7 +1521,8 @@ impl SessionState {
             )));
         }
         physical_optimizers.push(Arc::new(Repartition::new()));
-        physical_optimizers.push(Arc::new(AddCoalescePartitionsExec::new()));
+        physical_optimizers.push(Arc::new(BasicEnforcement::new()));
+        // physical_optimizers.push(Arc::new(AddCoalescePartitionsExec::new()));
 
         SessionState {
             session_id,
diff --git a/datafusion/core/src/physical_optimizer/coalesce_batches.rs b/datafusion/core/src/physical_optimizer/coalesce_batches.rs
index 51d56d28d9745..ac7ca388f90cc 100644
--- a/datafusion/core/src/physical_optimizer/coalesce_batches.rs
+++ b/datafusion/core/src/physical_optimizer/coalesce_batches.rs
@@ -24,9 +24,10 @@ use crate::{
     physical_plan::{
         coalesce_batches::CoalesceBatchesExec, filter::FilterExec,
         hash_join::HashJoinExec, repartition::RepartitionExec,
-        with_new_children_if_necessary,
     },
 };
+
+use crate::physical_optimizer::utils::transform_up;
 use std::sync::Arc;
 
 /// Optimizer rule that introduces CoalesceBatchesExec to avoid overhead with small batches that
@@ -43,40 +44,32 @@ impl CoalesceBatches {
         Self { target_batch_size }
     }
 }
+
 impl PhysicalOptimizerRule for CoalesceBatches {
     fn optimize(
         &self,
         plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
-        config: &crate::execution::context::SessionConfig,
+        _config: &crate::execution::context::SessionConfig,
     ) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
-        if plan.children().is_empty() {
-            // leaf node, children cannot be replaced
-            Ok(plan.clone())
-        } else {
-            // recurse down first
-            let children = plan
-                .children()
-                .iter()
-                .map(|child| self.optimize(child.clone(), config))
-                .collect::<Result<Vec<_>>>()?;
-            let plan = with_new_children_if_necessary(plan, children)?;
+        let target_batch_size = self.target_batch_size;
+        transform_up(plan, &|plan| {
+            let plan_any = plan.as_any();
             // The goal here is to detect operators that could produce small batches and only
             // wrap those ones with a CoalesceBatchesExec operator. An alternate approach here
             // would be to build the coalescing logic directly into the operators
             // See https://github.com/apache/arrow-datafusion/issues/139
-            let plan_any = plan.as_any();
             let wrap_in_coalesce = plan_any.downcast_ref::<FilterExec>().is_some()
                 || plan_any.downcast_ref::<HashJoinExec>().is_some()
                 || plan_any.downcast_ref::<RepartitionExec>().is_some();
-            Ok(if wrap_in_coalesce {
-                Arc::new(CoalesceBatchesExec::new(
+            if wrap_in_coalesce {
+                Some(Arc::new(CoalesceBatchesExec::new(
                     plan.clone(),
-                    self.target_batch_size,
-                ))
+                    target_batch_size,
+                )))
             } else {
-                plan.clone()
-            })
-        }
+                None
+            }
+        })
     }
 
     fn name(&self) -> &str {
diff --git a/datafusion/core/src/physical_optimizer/enforcement.rs b/datafusion/core/src/physical_optimizer/enforcement.rs
new file mode 100644
index 0000000000000..b559d0a00b818
--- /dev/null
+++ b/datafusion/core/src/physical_optimizer/enforcement.rs
@@ -0,0 +1,858 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Enforcement optimizer rules are used to make sure the plan's Distribution and Ordering
+//! requirements are met by inserting necessary [[RepartitionExec]] and [[SortExec]].
+//!
+use crate::error::Result;
+use crate::physical_optimizer::utils::transform_up;
+use crate::physical_optimizer::PhysicalOptimizerRule;
+use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use crate::physical_plan::repartition::RepartitionExec;
+use crate::physical_plan::sorts::sort::SortExec;
+use crate::physical_plan::{with_new_children_if_necessary, Distribution, ExecutionPlan};
+use crate::prelude::SessionConfig;
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::{
+    normalize_sort_expr_with_equivalence_properties, PhysicalSortExpr,
+};
+use std::sync::Arc;
+
+/// BasicEnforcement rule, it ensures the Distribution and Ordering requirements are met
+/// in the strictest way. It might add additional [[RepartitionExec]] to the plan tree
+/// and give a non-optimal plan, but it can avoid the possible data skew in joins
+///
+/// For example for a HashJoin with keys(a, b, c), the required Distribution(a, b, c) can be satisfied by
+/// several alternative partitioning ways: [(a, b, c), (a, b), (a, c), (b, c), (a), (b), (c), ( )].
+///
+/// This rule only chooses the exactly match and satisfies the Distribution(a, b, c) by a HashPartition(a, b, c).
+#[derive(Default)]
+pub struct BasicEnforcement {}
+
+impl BasicEnforcement {
+    #[allow(missing_docs)]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl PhysicalOptimizerRule for BasicEnforcement {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        config: &SessionConfig,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        // Distribution and Ordering enforcement need to be applied bottom-up.
+        let target_partitions = config.target_partitions;
+        transform_up(plan, &{
+            |plan| Some(ensure_distribution_and_ordering(plan, target_partitions))
+        })
+    }
+
+    fn name(&self) -> &str {
+        "BasicEnforcement"
+    }
+}
+
+fn ensure_distribution_and_ordering(
+    plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
+    target_partitions: usize,
+) -> Arc<dyn crate::physical_plan::ExecutionPlan> {
+    if plan.children().is_empty() {
+        return plan;
+    }
+    let required_input_distributions = plan.required_input_distribution();
+    let required_input_orderings = plan.required_input_ordering();
+    let children: Vec<Arc<dyn ExecutionPlan>> = plan.children();
+    assert_eq!(children.len(), required_input_distributions.len());
+    assert_eq!(children.len(), required_input_orderings.len());
+
+    // Add RepartitionExec to guarantee output partitioning
+    let children = children
+        .into_iter()
+        .zip(required_input_distributions.into_iter())
+        .map(|(child, required)| {
+            if child
+                .output_partitioning()
+                .satisfy(required.clone(), || child.equivalence_properties())
+            {
+                child
+            } else {
+                let new_child: Arc<dyn ExecutionPlan> = match required {
+                    Distribution::SinglePartition
+                        if child.output_partitioning().partition_count() > 1 =>
+                    {
+                        Arc::new(CoalescePartitionsExec::new(child.clone()))
+                    }
+                    _ => {
+                        let partition = required.create_partitioning(target_partitions);
+                        Arc::new(RepartitionExec::try_new(child, partition).unwrap())
+                    }
+                };
+                new_child
+            }
+        });
+
+    // Add SortExec to guarantee output ordering
+    let new_children: Vec<Arc<dyn ExecutionPlan>> = children
+        .zip(required_input_orderings.into_iter())
+        .map(|(child, required)| {
+            if ordering_satisfy(child.output_ordering(), required, || {
+                child.equivalence_properties()
+            }) {
+                child
+            } else {
+                let sort_expr = required.unwrap().to_vec();
+                if child.output_partitioning().partition_count() > 1 {
+                    Arc::new(SortExec::new_with_partitioning(
+                        sort_expr, child, true, None,
+                    ))
+                } else {
+                    Arc::new(SortExec::try_new(sort_expr, child, None).unwrap())
+                }
+            }
+        })
+        .collect::<Vec<_>>();
+
+    with_new_children_if_necessary(plan, new_children).unwrap()
+}
+
+/// DynamicEnforcement rule
+///
+///
+#[derive(Default)]
+pub struct DynamicEnforcement {}
+
+// TODO
+impl DynamicEnforcement {
+    #[allow(missing_docs)]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+/// Check the required ordering requirements are satisfied by the provided PhysicalSortExprs.
+fn ordering_satisfy<F: FnOnce() -> Vec<Vec<Column>>>(
+    provided: Option<&[PhysicalSortExpr]>,
+    required: Option<&[PhysicalSortExpr]>,
+    equal_properties: F,
+) -> bool {
+    match (provided, required) {
+        (_, None) => true,
+        (None, Some(_)) => false,
+        (Some(provided), Some(required)) => {
+            if required.len() > provided.len() {
+                false
+            } else {
+                let fast_match = required
+                    .iter()
+                    .zip(provided.iter())
+                    .all(|(order1, order2)| order1.eq(order2));
+
+                if !fast_match {
+                    let eq_properties = equal_properties();
+                    if !eq_properties.is_empty() {
+                        let normalized_required_exprs = required
+                            .iter()
+                            .map(|e| {
+                                normalize_sort_expr_with_equivalence_properties(
+                                    e.clone(),
+                                    &eq_properties,
+                                )
+                            })
+                            .collect::<Vec<_>>();
+                        let normalized_provided_exprs = provided
+                            .iter()
+                            .map(|e| {
+                                normalize_sort_expr_with_equivalence_properties(
+                                    e.clone(),
+                                    &eq_properties,
+                                )
+                            })
+                            .collect::<Vec<_>>();
+                        normalized_required_exprs
+                            .iter()
+                            .zip(normalized_provided_exprs.iter())
+                            .all(|(order1, order2)| order1.eq(order2))
+                    } else {
+                        fast_match
+                    }
+                } else {
+                    fast_match
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::compute::SortOptions;
+    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+    use datafusion_expr::logical_plan::JoinType;
+    use datafusion_physical_expr::expressions::Column;
+    use datafusion_physical_expr::{expressions, PhysicalExpr};
+
+    use super::*;
+    use crate::datasource::listing::PartitionedFile;
+    use crate::datasource::object_store::ObjectStoreUrl;
+    use crate::physical_plan::aggregates::{
+        AggregateExec, AggregateMode, PhysicalGroupBy,
+    };
+    use crate::physical_plan::expressions::col;
+    use crate::physical_plan::file_format::{FileScanConfig, ParquetExec};
+    use crate::physical_plan::hash_join::{HashJoinExec, PartitionMode};
+    use crate::physical_plan::join_utils::JoinOn;
+    use crate::physical_plan::projection::ProjectionExec;
+    use crate::physical_plan::sort_merge_join::SortMergeJoinExec;
+    use crate::physical_plan::{displayable, Statistics};
+
+    fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, true),
+            Field::new("b", DataType::Int64, true),
+            Field::new("c", DataType::Int64, true),
+            Field::new("d", DataType::Int32, true),
+            Field::new("e", DataType::Boolean, true),
+        ]))
+    }
+
+    fn parquet_exec() -> Arc<ParquetExec> {
+        Arc::new(ParquetExec::new(
+            FileScanConfig {
+                object_store_url: ObjectStoreUrl::parse("test:///").unwrap(),
+                file_schema: schema(),
+                file_groups: vec![vec![PartitionedFile::new("x".to_string(), 100)]],
+                statistics: Statistics::default(),
+                projection: None,
+                limit: None,
+                table_partition_cols: vec![],
+            },
+            None,
+            None,
+        ))
+    }
+
+    fn projection_exec_with_alias(
+        input: Arc<dyn ExecutionPlan>,
+        alias_pairs: Vec<(String, String)>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let mut exprs = vec![];
+        for (column, alias) in alias_pairs.iter() {
+            exprs.push((col(column, &input.schema()).unwrap(), alias.to_string()));
+        }
+        Arc::new(ProjectionExec::try_new(exprs, input).unwrap())
+    }
+
+    fn aggregate_exec_with_alias(
+        input: Arc<dyn ExecutionPlan>,
+        alias_pairs: Vec<(String, String)>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let schema = schema();
+        let mut group_by_expr: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
+        for (column, alias) in alias_pairs.iter() {
+            group_by_expr
+                .push((col(column, &input.schema()).unwrap(), alias.to_string()));
+        }
+        let group_by = PhysicalGroupBy::new_single(group_by_expr.clone());
+
+        let final_group_by_expr = group_by_expr
+            .iter()
+            .enumerate()
+            .map(|(index, (_col, name))| {
+                (
+                    Arc::new(expressions::Column::new(name, index))
+                        as Arc<dyn PhysicalExpr>,
+                    name.clone(),
+                )
+            })
+            .collect::<Vec<_>>();
+        let final_grouping = PhysicalGroupBy::new_single(final_group_by_expr);
+
+        Arc::new(
+            AggregateExec::try_new(
+                AggregateMode::FinalPartitioned,
+                final_grouping,
+                vec![],
+                Arc::new(
+                    AggregateExec::try_new(
+                        AggregateMode::Partial,
+                        group_by,
+                        vec![],
+                        input,
+                        schema.clone(),
+                    )
+                    .unwrap(),
+                ),
+                schema,
+            )
+            .unwrap(),
+        )
+    }
+
+    fn hash_join_exec(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        join_on: &JoinOn,
+        join_type: &JoinType,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(
+            HashJoinExec::try_new(
+                left,
+                right,
+                join_on.clone(),
+                None,
+                join_type,
+                PartitionMode::Partitioned,
+                &false,
+            )
+            .unwrap(),
+        )
+    }
+
+    fn sort_merge_join_exec(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        join_on: &JoinOn,
+        join_type: &JoinType,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(
+            SortMergeJoinExec::try_new(
+                left,
+                right,
+                join_on.clone(),
+                *join_type,
+                vec![SortOptions::default(); join_on.len()],
+                false,
+            )
+            .unwrap(),
+        )
+    }
+
+    fn trim_plan_display(plan: &str) -> Vec<&str> {
+        plan.split('\n')
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .collect()
+    }
+
+    /// Runs the repartition optimizer and asserts the plan against the expected
+    macro_rules! assert_optimized {
+        ($EXPECTED_LINES: expr, $PLAN: expr) => {
+            let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
+
+            // run optimizer
+            let optimizer = BasicEnforcement {};
+            let optimized = optimizer
+                .optimize($PLAN, &SessionConfig::new().with_target_partitions(10))?;
+
+            // Now format correctly
+            let plan = displayable(optimized.as_ref()).indent().to_string();
+            let actual_lines = trim_plan_display(&plan);
+
+            assert_eq!(
+                &expected_lines, &actual_lines,
+                "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+                expected_lines, actual_lines
+            );
+        };
+    }
+
+    #[test]
+    fn muti_hash_joins() -> Result<()> {
+        let left = parquet_exec();
+        let right = parquet_exec();
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::Semi,
+            JoinType::Anti,
+        ];
+
+        // Join on (a == b)
+        let join_on = vec![(
+            Column::new_with_schema("a", &schema()).unwrap(),
+            Column::new_with_schema("b", &schema()).unwrap(),
+        )];
+
+        // Join on (a == c)
+        let top_join_on = vec![(
+            Column::new_with_schema("a", &schema()).unwrap(),
+            Column::new_with_schema("c", &schema()).unwrap(),
+        )];
+
+        for join_type in join_types {
+            let join = hash_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+            let top_join =
+                hash_join_exec(join.clone(), right.clone(), &top_join_on, &join_type);
+
+            let top_join_plan =
+                format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
+            let join_plan =
+                format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"b\", index: 1 }})]", join_type);
+
+            let expected = match join_type {
+                // Should include 3 RepartitionExecs
+                JoinType::Inner | JoinType::Left => vec![
+                    top_join_plan.as_str(),
+                    join_plan.as_str(),
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+                // Should include 4 RepartitionExecs
+                _ => vec![
+                    top_join_plan.as_str(),
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    join_plan.as_str(),
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+            };
+            assert_optimized!(expected, top_join);
+
+            // This time we use (b == c) for top join
+            // Join on (b == c)
+            let top_join_on = vec![(
+                Column::new_with_schema("b", &schema()).unwrap(),
+                Column::new_with_schema("c", &schema()).unwrap(),
+            )];
+
+            let top_join = hash_join_exec(join, right.clone(), &top_join_on, &join_type);
+            let top_join_plan =
+                format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"b\", index: 1 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
+
+            let expected = match join_type {
+                // Should include 3 RepartitionExecs
+                JoinType::Inner | JoinType::Right => vec![
+                    top_join_plan.as_str(),
+                    join_plan.as_str(),
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+                // Should include 4 RepartitionExecs
+                _ => vec![
+                    top_join_plan.as_str(),
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    join_plan.as_str(),
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+            };
+            assert_optimized!(expected, top_join);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn muti_joins_after_alias() -> Result<()> {
+        let left = parquet_exec();
+        let right = parquet_exec();
+
+        // Join on (a == b)
+        let join_on = vec![(
+            Column::new_with_schema("a", &schema()).unwrap(),
+            Column::new_with_schema("b", &schema()).unwrap(),
+        )];
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Projection(as as a1, a as a2)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("a".to_string(), "a2".to_string()),
+        ];
+        let projection = projection_exec_with_alias(join, alias_pairs);
+
+        // Join on (a1 == c)
+        let top_join_on = vec![(
+            Column::new_with_schema("a1", &projection.schema()).unwrap(),
+            Column::new_with_schema("c", &schema()).unwrap(),
+        )];
+
+        let top_join = hash_join_exec(
+            projection.clone(),
+            right.clone(),
+            &top_join_on,
+            &JoinType::Inner,
+        );
+
+        // Output partition need to respect the Alias and should not introduce additional RepartitionExec
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"a1\", index: 0 }, Column { name: \"c\", index: 2 })]",
+            "ProjectionExec: expr=[a@0 as a1, a@0 as a2]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"a\", index: 0 }, Column { name: \"b\", index: 1 })]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, top_join);
+
+        // Join on (a2 == c)
+        let top_join_on = vec![(
+            Column::new_with_schema("a2", &projection.schema()).unwrap(),
+            Column::new_with_schema("c", &schema()).unwrap(),
+        )];
+
+        let top_join = hash_join_exec(projection, right, &top_join_on, &JoinType::Inner);
+
+        // Output partition need to respect the Alias and should not introduce additional RepartitionExec
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"a2\", index: 1 }, Column { name: \"c\", index: 2 })]",
+            "ProjectionExec: expr=[a@0 as a1, a@0 as a2]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"a\", index: 0 }, Column { name: \"b\", index: 1 })]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, top_join);
+        Ok(())
+    }
+
+    #[test]
+    fn muti_joins_after_multi_alias() -> Result<()> {
+        let left = parquet_exec();
+        let right = parquet_exec();
+
+        // Join on (a == b)
+        let join_on = vec![(
+            Column::new_with_schema("a", &schema()).unwrap(),
+            Column::new_with_schema("b", &schema()).unwrap(),
+        )];
+
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Projection(c as c1)
+        let alias_pairs: Vec<(String, String)> =
+            vec![("c".to_string(), "c1".to_string())];
+        let projection = projection_exec_with_alias(join, alias_pairs);
+
+        // Projection(c1 as a)
+        let alias_pairs: Vec<(String, String)> =
+            vec![("c1".to_string(), "a".to_string())];
+        let projection2 = projection_exec_with_alias(projection, alias_pairs);
+
+        // Join on (a == c)
+        let top_join_on = vec![(
+            Column::new_with_schema("a", &projection2.schema()).unwrap(),
+            Column::new_with_schema("c", &schema()).unwrap(),
+        )];
+
+        let top_join = hash_join_exec(projection2, right, &top_join_on, &JoinType::Inner);
+
+        // The Column 'a' has different meaning now after the two Projections
+        // The original Output partition can not satisfy the Join requirements and need to add an additional RepartitionExec
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"a\", index: 0 }, Column { name: \"c\", index: 2 })]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+            "ProjectionExec: expr=[c1@0 as a]",
+            "ProjectionExec: expr=[c@2 as c1]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"a\", index: 0 }, Column { name: \"b\", index: 1 })]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, top_join);
+        Ok(())
+    }
+
+    #[test]
+    fn join_after_agg_alias() -> Result<()> {
+        // group by (a as a1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a1".to_string())],
+        );
+        // group by (a as a2)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a2".to_string())],
+        );
+
+        // Join on (a1 == a2)
+        let join_on = vec![(
+            Column::new_with_schema("a1", &left.schema()).unwrap(),
+            Column::new_with_schema("a2", &right.schema()).unwrap(),
+        )];
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Only two RepartitionExecs added
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"a1\", index: 0 }, Column { name: \"a2\", index: 0 })]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a1\", index: 0 }], 10)",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a2\", index: 0 }], 10)",
+            "AggregateExec: mode=Partial, gby=[a@0 as a2], aggr=[]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, join);
+        Ok(())
+    }
+
+    #[test]
+    fn hash_join_key_ordering() -> Result<()> {
+        // group by (a as a1, b as b1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("a".to_string(), "a1".to_string()),
+                ("b".to_string(), "b1".to_string()),
+            ],
+        );
+        // group by (b, a)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("b".to_string(), "b".to_string()),
+                ("a".to_string(), "a".to_string()),
+            ],
+        );
+
+        // Join on (b1 == b && a1 == a)
+        let join_on = vec![
+            (
+                Column::new_with_schema("b1", &left.schema()).unwrap(),
+                Column::new_with_schema("b", &right.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("a1", &left.schema()).unwrap(),
+                Column::new_with_schema("a", &right.schema()).unwrap(),
+            ),
+        ];
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Only two RepartitionExecs added
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"b1\", index: 1 }, Column { name: \"b\", index: 0 }), (Column { name: \"a1\", index: 0 }, Column { name: \"a\", index: 1 })]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1, b1@1 as b1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a1\", index: 0 }, Column { name: \"b1\", index: 1 }], 10)",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1, b@1 as b1], aggr=[]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 0 }, Column { name: \"a\", index: 1 }], 10)",
+            "AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, join);
+        Ok(())
+    }
+
+    #[test]
+    fn muti_smj_joins() -> Result<()> {
+        let left = parquet_exec();
+        let right = parquet_exec();
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::Semi,
+            JoinType::Anti,
+        ];
+
+        // Join on (a == b)
+        let join_on = vec![(
+            Column::new_with_schema("a", &schema()).unwrap(),
+            Column::new_with_schema("b", &schema()).unwrap(),
+        )];
+
+        // Join on (a == c)
+        let top_join_on = vec![(
+            Column::new_with_schema("a", &schema()).unwrap(),
+            Column::new_with_schema("c", &schema()).unwrap(),
+        )];
+
+        for join_type in join_types {
+            let join =
+                sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+            let top_join = sort_merge_join_exec(
+                join.clone(),
+                right.clone(),
+                &top_join_on,
+                &join_type,
+            );
+
+            let top_join_plan =
+                format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
+            let join_plan =
+                format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"b\", index: 1 }})]", join_type);
+
+            let expected = match join_type {
+                // Should include 3 RepartitionExecs 3 SortExecs
+                JoinType::Inner | JoinType::Left => vec![
+                    top_join_plan.as_str(),
+                    join_plan.as_str(),
+                    "SortExec: [a@0 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [b@1 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [c@2 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+                // Should include 4 RepartitionExecs
+                _ => vec![
+                    top_join_plan.as_str(),
+                    "SortExec: [a@0 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    join_plan.as_str(),
+                    "SortExec: [a@0 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [b@1 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [c@2 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+            };
+            assert_optimized!(expected, top_join);
+
+            // This time we use (b == c) for top join
+            // Join on (b == c)
+            let top_join_on = vec![(
+                Column::new_with_schema("b", &schema()).unwrap(),
+                Column::new_with_schema("c", &schema()).unwrap(),
+            )];
+
+            let top_join =
+                sort_merge_join_exec(join, right.clone(), &top_join_on, &join_type);
+            let top_join_plan =
+                format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"b\", index: 1 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
+
+            let expected = match join_type {
+                // Should include 3 RepartitionExecs and 3 SortExecs
+                JoinType::Inner | JoinType::Right => vec![
+                    top_join_plan.as_str(),
+                    join_plan.as_str(),
+                    "SortExec: [a@0 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [b@1 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [c@2 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+                // Should include 4 RepartitionExecs and 4 SortExecs
+                _ => vec![
+                    top_join_plan.as_str(),
+                    "SortExec: [b@1 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    join_plan.as_str(),
+                    "SortExec: [a@0 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [b@1 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                    "SortExec: [c@2 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                ],
+            };
+            assert_optimized!(expected, top_join);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn smj_join_key_ordering() -> Result<()> {
+        // group by (a as a1, b as b1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("a".to_string(), "a1".to_string()),
+                ("b".to_string(), "b1".to_string()),
+            ],
+        );
+        // group by (b, a)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("b".to_string(), "b".to_string()),
+                ("a".to_string(), "a".to_string()),
+            ],
+        );
+
+        // Join on (b1 == b && a1 == a)
+        let join_on = vec![
+            (
+                Column::new_with_schema("b1", &left.schema()).unwrap(),
+                Column::new_with_schema("b", &right.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("a1", &left.schema()).unwrap(),
+                Column::new_with_schema("a", &right.schema()).unwrap(),
+            ),
+        ];
+        let join = sort_merge_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Only two RepartitionExecs added
+        let expected = &[
+            "SortMergeJoin: join_type=Inner, on=[(Column { name: \"b1\", index: 1 }, Column { name: \"b\", index: 0 }), (Column { name: \"a1\", index: 0 }, Column { name: \"a\", index: 1 })]",
+            "SortExec: [b1@1 ASC,a1@0 ASC]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1, b1@1 as b1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"a1\", index: 0 }, Column { name: \"b1\", index: 1 }], 10)",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1, b@1 as b1], aggr=[]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "SortExec: [b@0 ASC,a@1 ASC]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 0 }, Column { name: \"a\", index: 1 }], 10)",
+            "AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, join);
+        Ok(())
+    }
+}
diff --git a/datafusion/core/src/physical_optimizer/merge_exec.rs b/datafusion/core/src/physical_optimizer/merge_exec.rs
index f614673f500e6..77fcce9d3b601 100644
--- a/datafusion/core/src/physical_optimizer/merge_exec.rs
+++ b/datafusion/core/src/physical_optimizer/merge_exec.rs
@@ -52,27 +52,21 @@ impl PhysicalOptimizerRule for AddCoalescePartitionsExec {
                 .iter()
                 .map(|child| self.optimize(child.clone(), config))
                 .collect::<Result<Vec<_>>>()?;
-            match plan.required_child_distribution() {
-                Distribution::UnspecifiedDistribution => {
-                    with_new_children_if_necessary(plan, children)
-                }
-                Distribution::HashPartitioned(_) => {
-                    with_new_children_if_necessary(plan, children)
-                }
-                Distribution::SinglePartition => with_new_children_if_necessary(
-                    plan,
-                    children
-                        .iter()
-                        .map(|child| {
-                            if child.output_partitioning().partition_count() == 1 {
-                                child.clone()
-                            } else {
-                                Arc::new(CoalescePartitionsExec::new(child.clone()))
-                            }
-                        })
-                        .collect(),
-                ),
-            }
+            assert_eq!(children.len(), plan.required_input_distribution().len());
+
+            let new_children = children
+                .into_iter()
+                .zip(plan.required_input_distribution())
+                .map(|(child, dist)| match dist {
+                    Distribution::SinglePartition
+                        if child.output_partitioning().partition_count() > 1 =>
+                    {
+                        Arc::new(CoalescePartitionsExec::new(child.clone()))
+                    }
+                    _ => child,
+                })
+                .collect::<Vec<_>>();
+            with_new_children_if_necessary(plan, new_children)
         }
     }
 
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 55550bcd2cffc..5ecb9cd37a48d 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -20,6 +20,7 @@
 
 pub mod aggregate_statistics;
 pub mod coalesce_batches;
+pub mod enforcement;
 pub mod hash_build_probe_order;
 pub mod merge_exec;
 pub mod optimizer;
diff --git a/datafusion/core/src/physical_optimizer/repartition.rs b/datafusion/core/src/physical_optimizer/repartition.rs
index 1d2b259086839..720d40938b492 100644
--- a/datafusion/core/src/physical_optimizer/repartition.rs
+++ b/datafusion/core/src/physical_optimizer/repartition.rs
@@ -137,7 +137,7 @@ impl Repartition {
 ///
 /// 1. Has fewer partitions than `target_partitions`
 ///
-/// 2. Has a direct parent that `benefits_from_input_partitioning`
+/// 2. Has a direct parent that `prefer_parallel`
 ///
 /// 3. Does not have a parent that `relies_on_input_order` unless there
 /// is an intervening node that does not `maintain_input_order`
@@ -189,7 +189,7 @@ fn optimize_partitions(
                     target_partitions,
                     child.clone(),
                     can_reorder_children,
-                    plan.benefits_from_input_partitioning(),
+                    plan.prefer_parallel(),
                 )
             })
             .collect::<Result<_>>()?;
@@ -234,6 +234,7 @@ impl PhysicalOptimizerRule for Repartition {
         "repartition"
     }
 }
+
 #[cfg(test)]
 mod tests {
     use arrow::compute::SortOptions;
diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs
index 4aceb776d7d5b..18fb7e2c63e11 100644
--- a/datafusion/core/src/physical_optimizer/utils.rs
+++ b/datafusion/core/src/physical_optimizer/utils.rs
@@ -45,3 +45,74 @@ pub fn optimize_children(
         with_new_children_if_necessary(plan, children)
     }
 }
+
+/// Apply transform `F` to the plan's children, the transform `F` might have a direction(Preorder or Postorder)
+fn map_children<F>(
+    plan: Arc<dyn ExecutionPlan>,
+    transform: F,
+) -> Result<Arc<dyn ExecutionPlan>>
+where
+    F: Fn(Arc<dyn ExecutionPlan>) -> Result<Arc<dyn ExecutionPlan>>,
+{
+    if !plan.children().is_empty() {
+        let new_children: Result<Vec<_>> =
+            plan.children().into_iter().map(transform).collect();
+        with_new_children_if_necessary(plan, new_children?)
+    } else {
+        Ok(plan)
+    }
+}
+
+/// Convenience utils for writing optimizers rule: recursively apply the given `op` to the plan tree.
+/// When `op` does not apply to a given plan, it is left unchanged.
+/// The default tree traversal direction is transform_down(Preorder Traversal).
+#[allow(dead_code)]
+pub fn transform<F>(
+    plan: Arc<dyn ExecutionPlan>,
+    op: &F,
+) -> Result<Arc<dyn ExecutionPlan>>
+where
+    F: Fn(Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>>,
+{
+    transform_down(plan, op)
+}
+
+/// Convenience utils for writing optimizers rule: recursively apply the given 'op' to the plan and all of its
+/// children(Preorder Traversal). When the `op` does not apply to a given plan, it is left unchanged.
+#[allow(dead_code)]
+pub fn transform_down<F>(
+    plan: Arc<dyn ExecutionPlan>,
+    op: &F,
+) -> Result<Arc<dyn ExecutionPlan>>
+where
+    F: Fn(Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>>,
+{
+    let plan_cloned = plan.clone();
+    let after_op = match op(plan_cloned) {
+        Some(value) => value,
+        None => plan,
+    };
+    map_children(after_op.clone(), |plan: Arc<dyn ExecutionPlan>| {
+        transform_down(plan, op)
+    })
+}
+
+/// Convenience utils for writing optimizers rule: recursively apply the given 'op' first to all of its
+/// children and then itself(Postorder Traversal). When the `op` does not apply to a given plan, it is left unchanged.
+pub fn transform_up<F>(
+    plan: Arc<dyn ExecutionPlan>,
+    op: &F,
+) -> Result<Arc<dyn ExecutionPlan>>
+where
+    F: Fn(Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>>,
+{
+    let after_op_children =
+        map_children(plan, |plan: Arc<dyn ExecutionPlan>| transform_up(plan, op))?;
+
+    let after_op_children_clone = after_op_children.clone();
+    let new_plan = match op(after_op_children) {
+        Some(value) => value,
+        None => after_op_children_clone,
+    };
+    Ok(new_plan)
+}
diff --git a/datafusion/core/src/physical_plan/aggregates/mod.rs b/datafusion/core/src/physical_plan/aggregates/mod.rs
index aaf03b5c2a809..46ee58b17187e 100644
--- a/datafusion/core/src/physical_plan/aggregates/mod.rs
+++ b/datafusion/core/src/physical_plan/aggregates/mod.rs
@@ -34,9 +34,12 @@ use datafusion_common::Result;
 use datafusion_expr::Accumulator;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
-    expressions, AggregateExpr, PhysicalExpr, PhysicalSortExpr,
+    expressions, merge_equivalence_properties_with_alias,
+    normalize_out_expr_with_alias_schema, truncate_equivalence_properties_not_in_schema,
+    AggregateExpr, PhysicalExpr, PhysicalSortExpr,
 };
 use std::any::Any;
+use std::collections::HashMap;
 
 use std::sync::Arc;
 
@@ -163,6 +166,8 @@ pub struct AggregateExec {
     /// same as input.schema() but for the final aggregate it will be the same as the input
     /// to the partial aggregate
     input_schema: SchemaRef,
+    /// The alias map used to normalize out expressions like Partitioning
+    alias_map: HashMap<Column, Vec<Column>>,
     /// Execution Metrics
     metrics: ExecutionPlanMetricsSet,
 }
@@ -186,6 +191,18 @@ impl AggregateExec {
 
         let schema = Arc::new(schema);
 
+        let mut alias_map: HashMap<Column, Vec<Column>> = HashMap::new();
+        for (expression, name) in group_by.expr.iter() {
+            if let Some(column) = expression.as_any().downcast_ref::<Column>() {
+                let new_col_idx = schema.index_of(name)?;
+                // When the column name is the same, but index does not equal, treat it as Alias
+                if (column.name() != name) || (column.index() != new_col_idx) {
+                    let entry = alias_map.entry(column.clone()).or_insert_with(Vec::new);
+                    entry.push(Column::new(name, new_col_idx));
+                }
+            };
+        }
+
         Ok(AggregateExec {
             mode,
             group_by,
@@ -193,6 +210,7 @@ impl AggregateExec {
             input,
             schema,
             input_schema,
+            alias_map,
             metrics: ExecutionPlanMetricsSet::new(),
         })
     }
@@ -255,25 +273,57 @@ impl ExecutionPlan for AggregateExec {
 
     /// Get the output partitioning of this plan
     fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
+        match &self.mode {
+            AggregateMode::Partial => {
+                // Partial Aggregation will not change the output partitioning but need to respect the Alias
+                let input_partition = self.input.output_partitioning();
+                match input_partition {
+                    Partitioning::Hash(exprs, part) => {
+                        let normalized_exprs = exprs
+                            .into_iter()
+                            .map(|expr| {
+                                normalize_out_expr_with_alias_schema(
+                                    expr,
+                                    &self.alias_map,
+                                    &self.schema,
+                                )
+                            })
+                            .collect::<Vec<_>>();
+                        Partitioning::Hash(normalized_exprs, part)
+                    }
+                    _ => input_partition,
+                }
+            }
+            // Final Aggregation's output partitioning is the same as its real input
+            _ => self.input.output_partitioning(),
+        }
     }
 
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
         None
     }
 
-    fn required_child_distribution(&self) -> Distribution {
+    fn required_input_distribution(&self) -> Vec<Distribution> {
         match &self.mode {
-            AggregateMode::Partial => Distribution::UnspecifiedDistribution,
-            AggregateMode::FinalPartitioned => Distribution::HashPartitioned(
-                self.group_by.expr.iter().map(|x| x.0.clone()).collect(),
-            ),
-            AggregateMode::Final => Distribution::SinglePartition,
+            AggregateMode::Partial => vec![Distribution::UnspecifiedDistribution],
+            AggregateMode::FinalPartitioned => {
+                vec![Distribution::HashPartitioned(self.output_group_expr())]
+            }
+            AggregateMode::Final => vec![Distribution::SinglePartition],
         }
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        let mut input_equivalence_properties = self.input.equivalence_properties();
+        merge_equivalence_properties_with_alias(
+            &mut input_equivalence_properties,
+            &self.alias_map,
+        );
+        truncate_equivalence_properties_not_in_schema(
+            &mut input_equivalence_properties,
+            &self.schema,
+        );
+        input_equivalence_properties
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -652,7 +702,7 @@ mod tests {
     use arrow::error::Result as ArrowResult;
     use arrow::record_batch::RecordBatch;
     use datafusion_common::{DataFusionError, Result, ScalarValue};
-    use datafusion_physical_expr::expressions::{lit, Count};
+    use datafusion_physical_expr::expressions::{lit, Column, Count};
     use datafusion_physical_expr::{AggregateExpr, PhysicalExpr, PhysicalSortExpr};
     use futures::{FutureExt, Stream};
     use std::any::Any;
@@ -922,6 +972,10 @@ mod tests {
             None
         }
 
+        fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+            vec![]
+        }
+
         fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
             vec![]
         }
diff --git a/datafusion/core/src/physical_plan/analyze.rs b/datafusion/core/src/physical_plan/analyze.rs
index 8134ee7d2f2da..eeb65cc21fe10 100644
--- a/datafusion/core/src/physical_plan/analyze.rs
+++ b/datafusion/core/src/physical_plan/analyze.rs
@@ -28,6 +28,7 @@ use crate::{
     },
 };
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
+use datafusion_physical_expr::expressions::Column;
 use futures::StreamExt;
 
 use super::expressions::PhysicalSortExpr;
@@ -72,8 +73,8 @@ impl ExecutionPlan for AnalyzeExec {
     }
 
     /// Specifies we want the input as a single stream
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        vec![Distribution::SinglePartition]
     }
 
     /// Get the output partitioning of this plan
@@ -85,8 +86,8 @@ impl ExecutionPlan for AnalyzeExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/coalesce_batches.rs b/datafusion/core/src/physical_plan/coalesce_batches.rs
index 317500ddc904f..b8d690f2bbf1a 100644
--- a/datafusion/core/src/physical_plan/coalesce_batches.rs
+++ b/datafusion/core/src/physical_plan/coalesce_batches.rs
@@ -34,6 +34,7 @@ use arrow::compute::kernels::concat::concat;
 use arrow::datatypes::SchemaRef;
 use arrow::error::Result as ArrowResult;
 use arrow::record_batch::{RecordBatch, RecordBatchOptions};
+use datafusion_physical_expr::expressions::Column;
 use futures::stream::{Stream, StreamExt};
 use log::trace;
 
@@ -96,12 +97,15 @@ impl ExecutionPlan for CoalesceBatchesExec {
         self.input.output_partitioning()
     }
 
+    // Depends on how the CoalesceBatches was implemented, it is possible to keep
+    // the input ordering when combines small batches into larger batches
+    // TODO revisit the logic later
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/coalesce_partitions.rs b/datafusion/core/src/physical_plan/coalesce_partitions.rs
index d1c797eacd5c9..e6378e2d33571 100644
--- a/datafusion/core/src/physical_plan/coalesce_partitions.rs
+++ b/datafusion/core/src/physical_plan/coalesce_partitions.rs
@@ -27,6 +27,7 @@ use tokio::sync::mpsc;
 
 use arrow::record_batch::RecordBatch;
 use arrow::{datatypes::SchemaRef, error::Result as ArrowResult};
+use datafusion_physical_expr::expressions::Column;
 
 use super::common::AbortOnDropMany;
 use super::expressions::PhysicalSortExpr;
@@ -87,8 +88,8 @@ impl ExecutionPlan for CoalescePartitionsExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/cross_join.rs b/datafusion/core/src/physical_plan/cross_join.rs
index e3f25fc566abc..b1c26d494b5ab 100644
--- a/datafusion/core/src/physical_plan/cross_join.rs
+++ b/datafusion/core/src/physical_plan/cross_join.rs
@@ -41,6 +41,7 @@ use super::{
 };
 use crate::execution::context::TaskContext;
 use crate::physical_plan::join_utils::{OnceAsync, OnceFut};
+use datafusion_physical_expr::expressions::Column;
 use log::debug;
 
 /// Data of the left side
@@ -164,8 +165,11 @@ impl ExecutionPlan for CrossJoinExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        let mut left_properties = self.left.equivalence_properties();
+        let right_properties = self.left.equivalence_properties();
+        left_properties.extend(right_properties);
+        left_properties
     }
 
     fn execute(
diff --git a/datafusion/core/src/physical_plan/empty.rs b/datafusion/core/src/physical_plan/empty.rs
index c693764c87aa0..a8e17d7d854dc 100644
--- a/datafusion/core/src/physical_plan/empty.rs
+++ b/datafusion/core/src/physical_plan/empty.rs
@@ -22,11 +22,12 @@ use std::sync::Arc;
 
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
-    memory::MemoryStream, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
+    memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning,
 };
 use arrow::array::NullArray;
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
+use datafusion_physical_expr::expressions::Column;
 use log::debug;
 
 use super::expressions::PhysicalSortExpr;
@@ -98,10 +99,6 @@ impl ExecutionPlan for EmptyExec {
         vec![]
     }
 
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
-    }
-
     /// Get the output partitioning of this plan
     fn output_partitioning(&self) -> Partitioning {
         Partitioning::UnknownPartitioning(self.partitions)
@@ -111,6 +108,10 @@ impl ExecutionPlan for EmptyExec {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn with_new_children(
         self: Arc<Self>,
         _: Vec<Arc<dyn ExecutionPlan>>,
diff --git a/datafusion/core/src/physical_plan/explain.rs b/datafusion/core/src/physical_plan/explain.rs
index 29a17ee870db4..5cf854ba01244 100644
--- a/datafusion/core/src/physical_plan/explain.rs
+++ b/datafusion/core/src/physical_plan/explain.rs
@@ -29,6 +29,7 @@ use crate::{
     },
 };
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
+use datafusion_physical_expr::expressions::Column;
 use log::debug;
 
 use super::{expressions::PhysicalSortExpr, SendableRecordBatchStream};
@@ -97,8 +98,8 @@ impl ExecutionPlan for ExplainExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/file_format/avro.rs b/datafusion/core/src/physical_plan/file_format/avro.rs
index 0b7841d885d20..8da22a2ff3c4f 100644
--- a/datafusion/core/src/physical_plan/file_format/avro.rs
+++ b/datafusion/core/src/physical_plan/file_format/avro.rs
@@ -25,6 +25,7 @@ use arrow::datatypes::SchemaRef;
 
 use crate::execution::context::TaskContext;
 use crate::physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_expr::expressions::Column;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -76,8 +77,8 @@ impl ExecutionPlan for AvroExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/core/src/physical_plan/file_format/csv.rs b/datafusion/core/src/physical_plan/file_format/csv.rs
index 885bea870bd82..38f0391b65015 100644
--- a/datafusion/core/src/physical_plan/file_format/csv.rs
+++ b/datafusion/core/src/physical_plan/file_format/csv.rs
@@ -32,6 +32,7 @@ use crate::physical_plan::{
 use arrow::csv;
 use arrow::datatypes::SchemaRef;
 use bytes::Buf;
+use datafusion_physical_expr::expressions::Column;
 use futures::{StreamExt, TryStreamExt};
 use object_store::{GetResult, ObjectStore};
 use std::any::Any;
@@ -99,14 +100,14 @@ impl ExecutionPlan for CsvExec {
         Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
-    }
-
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         // this is a leaf node and has no children
         vec![]
diff --git a/datafusion/core/src/physical_plan/file_format/json.rs b/datafusion/core/src/physical_plan/file_format/json.rs
index 10f148ad060fa..3c229cd6be960 100644
--- a/datafusion/core/src/physical_plan/file_format/json.rs
+++ b/datafusion/core/src/physical_plan/file_format/json.rs
@@ -32,6 +32,7 @@ use crate::physical_plan::{
 use arrow::json::reader::DecoderOptions;
 use arrow::{datatypes::SchemaRef, json};
 use bytes::Buf;
+use datafusion_physical_expr::expressions::Column;
 use futures::{StreamExt, TryStreamExt};
 use object_store::{GetResult, ObjectStore};
 use std::any::Any;
@@ -83,8 +84,8 @@ impl ExecutionPlan for NdJsonExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/core/src/physical_plan/file_format/parquet.rs b/datafusion/core/src/physical_plan/file_format/parquet.rs
index ff6507c7f65e4..6adef94092c70 100644
--- a/datafusion/core/src/physical_plan/file_format/parquet.rs
+++ b/datafusion/core/src/physical_plan/file_format/parquet.rs
@@ -256,8 +256,10 @@ impl ExecutionPlan for ParquetExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(
+        &self,
+    ) -> Vec<Vec<datafusion_physical_expr::expressions::Column>> {
+        vec![]
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/filter.rs b/datafusion/core/src/physical_plan/filter.rs
index b4e3edaee05fd..bc7c6fd8e9238 100644
--- a/datafusion/core/src/physical_plan/filter.rs
+++ b/datafusion/core/src/physical_plan/filter.rs
@@ -39,6 +39,11 @@ use arrow::record_batch::RecordBatch;
 use log::debug;
 
 use crate::execution::context::TaskContext;
+use datafusion_expr::Operator;
+use datafusion_physical_expr::expressions::{BinaryExpr, Column};
+use datafusion_physical_expr::{
+    combine_equivalence_properties, remove_equivalence_properties, split_predicate,
+};
 use futures::stream::{Stream, StreamExt};
 
 /// FilterExec evaluates a boolean predicate against all input batches to determine which rows to
@@ -113,8 +118,17 @@ impl ExecutionPlan for FilterExec {
         true
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        // Combine the equal predicates with the input equivalence properties
+        let mut input_properties = self.input.equivalence_properties();
+        let (equal_pairs, ne_pairs) = collect_columns_from_predicate(&self.predicate);
+        for new_condition in equal_pairs {
+            combine_equivalence_properties(&mut input_properties, new_condition)
+        }
+        for remove_condition in ne_pairs {
+            remove_equivalence_properties(&mut input_properties, remove_condition)
+        }
+        input_properties
     }
 
     fn with_new_children(
@@ -231,6 +245,39 @@ impl RecordBatchStream for FilterExecStream {
     }
 }
 
+/// Return the equals Column-Pairs and Non-equals Column-Pairs
+fn collect_columns_from_predicate(predicate: &Arc<dyn PhysicalExpr>) -> EqualAndNonEqual {
+    let mut eq_predicate_columns: Vec<(&Column, &Column)> = Vec::new();
+    let mut ne_predicate_columns: Vec<(&Column, &Column)> = Vec::new();
+
+    let predicates = split_predicate(predicate);
+    predicates.into_iter().for_each(|p| {
+        if let Some(binary) = p.as_any().downcast_ref::<BinaryExpr>() {
+            let left = binary.left();
+            let right = binary.right();
+            if left.as_any().is::<Column>() && right.as_any().is::<Column>() {
+                let left_column = left.as_any().downcast_ref::<Column>().unwrap();
+                let right_column = left.as_any().downcast_ref::<Column>().unwrap();
+                match binary.op() {
+                    Operator::Eq => {
+                        eq_predicate_columns.push((left_column, right_column))
+                    }
+                    Operator::NotEq => {
+                        ne_predicate_columns.push((left_column, right_column))
+                    }
+                    _ => {}
+                }
+            }
+        }
+    });
+
+    (eq_predicate_columns, ne_predicate_columns)
+}
+
+/// The equals Column-Pairs and Non-equals Column-Pairs in the Predicates
+pub type EqualAndNonEqual<'a> =
+    (Vec<(&'a Column, &'a Column)>, Vec<(&'a Column, &'a Column)>);
+
 #[cfg(test)]
 mod tests {
 
diff --git a/datafusion/core/src/physical_plan/hash_join.rs b/datafusion/core/src/physical_plan/hash_join.rs
index a22bcbc13c7e1..ca165ae97c620 100644
--- a/datafusion/core/src/physical_plan/hash_join.rs
+++ b/datafusion/core/src/physical_plan/hash_join.rs
@@ -78,9 +78,10 @@ use crate::arrow::array::BooleanBufferBuilder;
 use crate::arrow::datatypes::TimeUnit;
 use crate::execution::context::TaskContext;
 use crate::physical_plan::coalesce_batches::concat_batches;
-use crate::physical_plan::PhysicalExpr;
+use crate::physical_plan::{Distribution, PhysicalExpr};
 
 use crate::physical_plan::join_utils::{OnceAsync, OnceFut};
+use datafusion_physical_expr::combine_equivalence_properties;
 use log::debug;
 use std::cmp;
 use std::fmt;
@@ -283,6 +284,31 @@ impl ExecutionPlan for HashJoinExec {
         vec![self.left.clone(), self.right.clone()]
     }
 
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        match self.mode {
+            PartitionMode::CollectLeft => vec![
+                Distribution::SinglePartition,
+                Distribution::UnspecifiedDistribution,
+            ],
+            PartitionMode::Partitioned => {
+                let (left_expr, right_expr) = self
+                    .on
+                    .iter()
+                    .map(|(l, r)| {
+                        (
+                            Arc::new(l.clone()) as Arc<dyn PhysicalExpr>,
+                            Arc::new(r.clone()) as Arc<dyn PhysicalExpr>,
+                        )
+                    })
+                    .unzip();
+                vec![
+                    Distribution::HashPartitioned(left_expr),
+                    Distribution::HashPartitioned(right_expr),
+                ]
+            }
+        }
+    }
+
     fn with_new_children(
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
@@ -299,15 +325,33 @@ impl ExecutionPlan for HashJoinExec {
     }
 
     fn output_partitioning(&self) -> Partitioning {
-        self.right.output_partitioning()
+        match self.join_type {
+            JoinType::Inner => self.left.output_partitioning(),
+            JoinType::Left => self.left.output_partitioning(),
+            JoinType::Right => self.right.output_partitioning(),
+            _ => Partitioning::UnknownPartitioning(
+                self.right.output_partitioning().partition_count(),
+            ),
+        }
     }
 
+    // Output ordering might be kept for some cases.
+    // For example if it is inner join then the stream side order can be kept
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        let mut left_properties = self.left.equivalence_properties();
+        let right_properties = self.right.equivalence_properties();
+        left_properties.extend(right_properties);
+
+        if self.join_type == JoinType::Inner {
+            self.on.iter().for_each(|(column1, column2)| {
+                combine_equivalence_properties(&mut left_properties, (column1, column2))
+            })
+        }
+        left_properties
     }
 
     fn execute(
@@ -318,6 +362,7 @@ impl ExecutionPlan for HashJoinExec {
         let on_left = self.on.iter().map(|on| on.0.clone()).collect::<Vec<_>>();
         let on_right = self.on.iter().map(|on| on.1.clone()).collect::<Vec<_>>();
 
+        //TODO fix this in distribution model, need to add shuffle
         let left_fut = match self.mode {
             PartitionMode::CollectLeft => self.left_fut.once(|| {
                 collect_left_input(
diff --git a/datafusion/core/src/physical_plan/limit.rs b/datafusion/core/src/physical_plan/limit.rs
index 322c21ff419cc..cb3c389b3b2ae 100644
--- a/datafusion/core/src/physical_plan/limit.rs
+++ b/datafusion/core/src/physical_plan/limit.rs
@@ -34,6 +34,7 @@ use arrow::compute::limit;
 use arrow::datatypes::SchemaRef;
 use arrow::error::Result as ArrowResult;
 use arrow::record_batch::RecordBatch;
+use datafusion_physical_expr::expressions::Column;
 
 use super::expressions::PhysicalSortExpr;
 use super::{
@@ -98,8 +99,8 @@ impl ExecutionPlan for GlobalLimitExec {
         vec![self.input.clone()]
     }
 
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        vec![Distribution::SinglePartition]
     }
 
     /// Get the output partitioning of this plan
@@ -115,7 +116,7 @@ impl ExecutionPlan for GlobalLimitExec {
         true
     }
 
-    fn benefits_from_input_partitioning(&self) -> bool {
+    fn prefer_parallel(&self) -> bool {
         false
     }
 
@@ -123,6 +124,10 @@ impl ExecutionPlan for GlobalLimitExec {
         self.input.output_ordering()
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
+    }
+
     fn with_new_children(
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
@@ -277,18 +282,17 @@ impl ExecutionPlan for LocalLimitExec {
         self.input.output_ordering().is_some()
     }
 
-    fn benefits_from_input_partitioning(&self) -> bool {
+    fn prefer_parallel(&self) -> bool {
         false
     }
 
-    // Local limit does not make any attempt to maintain the input
-    // sortedness (if there is more than one partition)
+    // Local limit will not change the input plan's ordering
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        if self.output_partitioning().partition_count() == 1 {
-            self.input.output_ordering()
-        } else {
-            None
-        }
+        self.input.output_ordering()
+    }
+
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/memory.rs b/datafusion/core/src/physical_plan/memory.rs
index d2dbe0d738c39..3b33fa4fed5a5 100644
--- a/datafusion/core/src/physical_plan/memory.rs
+++ b/datafusion/core/src/physical_plan/memory.rs
@@ -34,6 +34,7 @@ use arrow::record_batch::RecordBatch;
 
 use crate::execution::context::TaskContext;
 use datafusion_common::DataFusionError;
+use datafusion_physical_expr::expressions::Column;
 use futures::Stream;
 
 /// Execution plan for reading in-memory batches of data
@@ -81,8 +82,8 @@ impl ExecutionPlan for MemoryExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/mod.rs b/datafusion/core/src/physical_plan/mod.rs
index 4613be7cdcbcc..33007387c53eb 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -41,6 +41,7 @@ use datafusion_common::DataFusionError;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::{any::Any, pin::Pin};
+use tokio::macros::support::thread_rng_n;
 
 /// Trait for types that stream [arrow::record_batch::RecordBatch]
 pub trait RecordBatchStream: Stream<Item = ArrowResult<RecordBatch>> {
@@ -85,6 +86,73 @@ impl Stream for EmptyRecordBatchStream {
     }
 }
 
+/// CombinedRecordBatchStream can be used to combine a Vec of SendableRecordBatchStreams into one
+pub struct CombinedRecordBatchStream {
+    /// Schema wrapped by Arc
+    schema: SchemaRef,
+    /// Stream entries
+    entries: Vec<SendableRecordBatchStream>,
+}
+
+impl CombinedRecordBatchStream {
+    /// Create an CombinedRecordBatchStream
+    pub fn new(schema: SchemaRef, entries: Vec<SendableRecordBatchStream>) -> Self {
+        Self { schema, entries }
+    }
+}
+
+impl RecordBatchStream for CombinedRecordBatchStream {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+impl Stream for CombinedRecordBatchStream {
+    type Item = ArrowResult<RecordBatch>;
+
+    fn poll_next(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        use Poll::*;
+
+        let start = thread_rng_n(self.entries.len() as u32) as usize;
+        let mut idx = start;
+
+        for _ in 0..self.entries.len() {
+            let stream = self.entries.get_mut(idx).unwrap();
+
+            match Pin::new(stream).poll_next(cx) {
+                Ready(Some(val)) => return Ready(Some(val)),
+                Ready(None) => {
+                    // Remove the entry
+                    self.entries.swap_remove(idx);
+
+                    // Check if this was the last entry, if so the cursor needs
+                    // to wrap
+                    if idx == self.entries.len() {
+                        idx = 0;
+                    } else if idx < start && start <= self.entries.len() {
+                        // The stream being swapped into the current index has
+                        // already been polled, so skip it.
+                        idx = idx.wrapping_add(1) % self.entries.len();
+                    }
+                }
+                Pending => {
+                    idx = idx.wrapping_add(1) % self.entries.len();
+                }
+            }
+        }
+
+        // If the map is empty, then the stream is complete.
+        if self.entries.is_empty() {
+            Ready(None)
+        } else {
+            Pending
+        }
+    }
+}
+
 /// Physical planner interface
 pub use self::planner::PhysicalPlanner;
 
@@ -139,6 +207,8 @@ pub trait ExecutionPlan: Debug + Send + Sync {
     /// Specifies the output partitioning scheme of this plan
     fn output_partitioning(&self) -> Partitioning;
 
+    /// Describe how data is ordered in each partition.
+    ///
     /// If the output of this operator is sorted, returns `Some(keys)`
     /// with the description of how it was sorted.
     ///
@@ -151,10 +221,20 @@ pub trait ExecutionPlan: Debug + Send + Sync {
     /// have any particular output order here
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>;
 
-    /// Specifies the data distribution requirements of all the
-    /// children for this operator
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
+    /// Specifies the data distribution requirements for all the
+    /// children for this operator, By default it's [[Distribution::UnspecifiedDistribution]] for each child,
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        if !self.children().is_empty() {
+            vec![Distribution::UnspecifiedDistribution; self.children().len()]
+        } else {
+            vec![Distribution::UnspecifiedDistribution]
+        }
+    }
+
+    /// Specifies the ordering requirements for all the
+    /// children for this operator.
+    fn required_input_ordering(&self) -> Vec<Option<&[PhysicalSortExpr]>> {
+        vec![None; self.children().len()]
     }
 
     /// Returns `true` if this operator relies on its inputs being
@@ -165,13 +245,17 @@ pub trait ExecutionPlan: Debug + Send + Sync {
     /// optimizations which might reorder the inputs (such as
     /// repartitioning to increase concurrency).
     ///
-    /// The default implementation returns `true`
+    /// The default implementation checks the input ordering requirements
+    /// and if there is non empty ordering requirements to the input, the method will
+    /// return `true`.
     ///
     /// WARNING: if you override this default and return `false`, your
     /// operator can not rely on DataFusion preserving the input order
     /// as it will likely not.
     fn relies_on_input_order(&self) -> bool {
-        true
+        self.required_input_ordering()
+            .iter()
+            .any(|ordering| matches!(ordering, Some(_)))
     }
 
     /// Returns `false` if this operator's implementation may reorder
@@ -201,15 +285,18 @@ pub trait ExecutionPlan: Debug + Send + Sync {
     ///
     /// The default implementation returns `true` unless this operator
     /// has signalled it requires a single child input partition.
-    fn benefits_from_input_partitioning(&self) -> bool {
+    fn prefer_parallel(&self) -> bool {
         // By default try to maximize parallelism with more CPUs if
-        // possible
-        !matches!(
-            self.required_child_distribution(),
-            Distribution::SinglePartition
-        )
+        // possibles
+        !self
+            .required_input_distribution()
+            .into_iter()
+            .any(|dist| matches!(dist, Distribution::SinglePartition))
     }
 
+    /// Get a list of equivalence properties within the plan
+    fn equivalence_properties(&self) -> Vec<Vec<Column>>;
+
     /// Get a list of child execution plans that provide the input for this plan. The returned list
     /// will be empty for leaf nodes, will contain a single value for unary nodes, or two
     /// values for binary nodes (such as joins).
@@ -487,6 +574,82 @@ impl Partitioning {
             RoundRobinBatch(n) | Hash(_, n) | UnknownPartitioning(n) => *n,
         }
     }
+
+    /// Returns true when the guarantees made by this [[Partitioning]] are sufficient to
+    /// satisfy the partitioning scheme mandated by the `required` [[Distribution]]
+    pub fn satisfy<F: FnOnce() -> Vec<Vec<Column>>>(
+        &self,
+        required: Distribution,
+        equal_properties: F,
+    ) -> bool {
+        match required {
+            Distribution::UnspecifiedDistribution => true,
+            Distribution::SinglePartition if self.partition_count() == 1 => true,
+            Distribution::HashPartitioned(required_exprs) => {
+                match self {
+                    // Here we do not check the partition count for hash partitioning and assumes the partition count
+                    // and hash functions in the system are the same. In future if we plan to support storage partition-wise joins,
+                    // then we need to have the partition count and hash functions validation.
+                    Partitioning::Hash(partition_exprs, _) => {
+                        let fast_match =
+                            expr_list_eq_any_order(&required_exprs, partition_exprs);
+                        // If the required exprs do not match, need to leverage the eq_properties provided by the child
+                        // and normalize both exprs based on the eq_properties
+                        if !fast_match {
+                            let eq_properties = equal_properties();
+                            if !eq_properties.is_empty() {
+                                let normalized_required_exprs = required_exprs
+                                    .iter()
+                                    .map(|e| {
+                                        normalize_expr_with_equivalence_properties(
+                                            e.clone(),
+                                            &eq_properties,
+                                        )
+                                    })
+                                    .collect::<Vec<_>>();
+                                let normalized_partition_exprs = partition_exprs
+                                    .iter()
+                                    .map(|e| {
+                                        normalize_expr_with_equivalence_properties(
+                                            e.clone(),
+                                            &eq_properties,
+                                        )
+                                    })
+                                    .collect::<Vec<_>>();
+                                expr_list_eq_any_order(
+                                    &normalized_required_exprs,
+                                    &normalized_partition_exprs,
+                                )
+                            } else {
+                                fast_match
+                            }
+                        } else {
+                            fast_match
+                        }
+                    }
+                    _ => false,
+                }
+            }
+            _ => false,
+        }
+    }
+}
+
+impl PartialEq for Partitioning {
+    fn eq(&self, other: &Partitioning) -> bool {
+        match (self, other) {
+            (
+                Partitioning::RoundRobinBatch(count1),
+                Partitioning::RoundRobinBatch(count2),
+            ) if count1 == count2 => true,
+            (Partitioning::Hash(exprs1, count1), Partitioning::Hash(exprs2, count2))
+                if expr_list_eq_any_order(exprs1, exprs2) && (count1 == count2) =>
+            {
+                true
+            }
+            _ => false,
+        }
+    }
 }
 
 /// Distribution schemes
@@ -501,6 +664,21 @@ pub enum Distribution {
     HashPartitioned(Vec<Arc<dyn PhysicalExpr>>),
 }
 
+impl Distribution {
+    /// Creates a Partitioning for this Distribution to satisfy itself
+    pub fn create_partitioning(&self, partition_count: usize) -> Partitioning {
+        match self {
+            Distribution::UnspecifiedDistribution => {
+                Partitioning::UnknownPartitioning(partition_count)
+            }
+            Distribution::SinglePartition => Partitioning::UnknownPartitioning(1),
+            Distribution::HashPartitioned(expr) => {
+                Partitioning::Hash(expr.clone(), partition_count)
+            }
+        }
+    }
+}
+
 pub use datafusion_physical_expr::window::WindowExpr;
 pub use datafusion_physical_expr::{AggregateExpr, PhysicalExpr};
 
@@ -573,4 +751,8 @@ pub mod values;
 pub mod windows;
 
 use crate::execution::context::TaskContext;
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::{
+    expr_list_eq_any_order, normalize_expr_with_equivalence_properties,
+};
 pub use datafusion_physical_expr::{expressions, functions, type_coercion, udf};
diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs
index 8b247a4521356..da380323610ec 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -522,8 +522,8 @@ impl DefaultPhysicalPlanner {
                         && session_state.config.target_partitions > 1
                         && session_state.config.repartition_windows;
 
-                    let input_exec = if can_repartition {
-                        let partition_keys = partition_keys
+                    let physical_partition_keys = if can_repartition {
+                        partition_keys
                             .iter()
                             .map(|e| {
                                 self.create_physical_expr(
@@ -533,19 +533,11 @@ impl DefaultPhysicalPlanner {
                                     session_state,
                                 )
                             })
-                            .collect::<Result<Vec<Arc<dyn PhysicalExpr>>>>()?;
-                        Arc::new(RepartitionExec::try_new(
-                            input_exec,
-                            Partitioning::Hash(
-                                partition_keys,
-                                session_state.config.target_partitions,
-                            ),
-                        )?)
+                            .collect::<Result<Vec<Arc<dyn PhysicalExpr>>>>()?
                     } else {
-                        input_exec
+                        vec![]
                     };
 
-                    // add a sort phase
                     let get_sort_keys = |expr: &Expr| match expr {
                         Expr::WindowFunction {
                             ref partition_by,
@@ -566,8 +558,8 @@ impl DefaultPhysicalPlanner {
 
                     let logical_input_schema = input.schema();
 
-                    let input_exec = if sort_keys.is_empty() {
-                        input_exec
+                    let physical_sort_keys = if sort_keys.is_empty() {
+                        None
                     } else {
                         let physical_input_schema = input_exec.schema();
                         let sort_keys = sort_keys
@@ -590,11 +582,7 @@ impl DefaultPhysicalPlanner {
                                 _ => unreachable!(),
                             })
                             .collect::<Result<Vec<_>>>()?;
-                        Arc::new(if can_repartition {
-                            SortExec::new_with_partitioning(sort_keys, input_exec, true, None)
-                        } else {
-                            SortExec::try_new(sort_keys, input_exec, None)?
-                        })
+                        Some(sort_keys)
                     };
 
                     let physical_input_schema = input_exec.schema();
@@ -614,6 +602,8 @@ impl DefaultPhysicalPlanner {
                         window_expr,
                         input_exec,
                         physical_input_schema,
+                        physical_partition_keys,
+                        physical_sort_keys,
                     )?) )
                 }
                 LogicalPlan::Aggregate(Aggregate {
@@ -664,16 +654,8 @@ impl DefaultPhysicalPlanner {
                         Arc<dyn ExecutionPlan>,
                         AggregateMode,
                     ) = if can_repartition {
-                        // Divide partial hash aggregates into multiple partitions by hash key
-                        let hash_repartition = Arc::new(RepartitionExec::try_new(
-                            initial_aggr,
-                            Partitioning::Hash(
-                                final_group.clone(),
-                                session_state.config.target_partitions,
-                            ),
-                        )?);
-                        // Combine hash aggregates within the partition
-                        (hash_repartition, AggregateMode::FinalPartitioned)
+                        // construct a second aggregation with 'AggregateMode::FinalPartitioned'
+                        (initial_aggr, AggregateMode::FinalPartitioned)
                     } else {
                         // construct a second aggregation, keeping the final column name equal to the
                         // first aggregation and the expressions corresponding to the respective aggregate
@@ -943,32 +925,10 @@ impl DefaultPhysicalPlanner {
                     if session_state.config.target_partitions > 1
                         && session_state.config.repartition_joins
                     {
-                        let (left_expr, right_expr) = join_on
-                            .iter()
-                            .map(|(l, r)| {
-                                (
-                                    Arc::new(l.clone()) as Arc<dyn PhysicalExpr>,
-                                    Arc::new(r.clone()) as Arc<dyn PhysicalExpr>,
-                                )
-                            })
-                            .unzip();
-
                         // Use hash partition by default to parallelize hash joins
                         Ok(Arc::new(HashJoinExec::try_new(
-                            Arc::new(RepartitionExec::try_new(
-                                physical_left,
-                                Partitioning::Hash(
-                                    left_expr,
-                                    session_state.config.target_partitions,
-                                ),
-                            )?),
-                            Arc::new(RepartitionExec::try_new(
-                                physical_right,
-                                Partitioning::Hash(
-                                    right_expr,
-                                    session_state.config.target_partitions,
-                                ),
-                            )?),
+                            physical_left,
+                            physical_right,
                             join_on,
                             join_filter,
                             join_type,
@@ -2275,8 +2235,8 @@ mod tests {
             None
         }
 
-        fn relies_on_input_order(&self) -> bool {
-            false
+        fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+            vec![]
         }
 
         fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/core/src/physical_plan/projection.rs b/datafusion/core/src/physical_plan/projection.rs
index 5fa3c93cdd421..96afd26517d23 100644
--- a/datafusion/core/src/physical_plan/projection.rs
+++ b/datafusion/core/src/physical_plan/projection.rs
@@ -21,7 +21,7 @@
 //! projection expressions. `SELECT` without `FROM` will only evaluate expressions.
 
 use std::any::Any;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap};
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
@@ -39,6 +39,10 @@ use super::expressions::{Column, PhysicalSortExpr};
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{RecordBatchStream, SendableRecordBatchStream, Statistics};
 use crate::execution::context::TaskContext;
+use datafusion_physical_expr::{
+    merge_equivalence_properties_with_alias, normalize_out_expr_with_alias_schema,
+    truncate_equivalence_properties_not_in_schema,
+};
 use futures::stream::Stream;
 use futures::stream::StreamExt;
 
@@ -47,6 +51,8 @@ use futures::stream::StreamExt;
 pub struct ProjectionExec {
     /// The projection expressions stored as tuples of (expression, output column name)
     expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
+    /// The alias map used to normalize out expressions like Partitioning and PhysicalSortExpr
+    alias_map: HashMap<Column, Vec<Column>>,
     /// The schema once the projection has been applied to the input
     schema: SchemaRef,
     /// The input plan
@@ -82,8 +88,21 @@ impl ProjectionExec {
             input_schema.metadata().clone(),
         ));
 
+        let mut alias_map: HashMap<Column, Vec<Column>> = HashMap::new();
+        for (expression, name) in expr.iter() {
+            if let Some(column) = expression.as_any().downcast_ref::<Column>() {
+                let new_col_idx = schema.index_of(name)?;
+                // When the column name is the same, but index does not equal, treat it as Alias
+                if (column.name() != name) || (column.index() != new_col_idx) {
+                    let entry = alias_map.entry(column.clone()).or_insert_with(Vec::new);
+                    entry.push(Column::new(name, new_col_idx));
+                }
+            };
+        }
+
         Ok(Self {
             expr,
+            alias_map,
             schema,
             input: input.clone(),
             metrics: ExecutionPlanMetricsSet::new(),
@@ -118,10 +137,28 @@ impl ExecutionPlan for ProjectionExec {
 
     /// Get the output partitioning of this plan
     fn output_partitioning(&self) -> Partitioning {
-        self.input.output_partitioning()
+        // Output partition need to respect the Alias
+        let input_partition = self.input.output_partitioning();
+        match input_partition {
+            Partitioning::Hash(exprs, part) => {
+                let normalized_exprs = exprs
+                    .into_iter()
+                    .map(|expr| {
+                        normalize_out_expr_with_alias_schema(
+                            expr,
+                            &self.alias_map,
+                            &self.schema,
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                Partitioning::Hash(normalized_exprs, part)
+            }
+            _ => input_partition,
+        }
     }
 
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        // TODO Output ordering need to respect the Alias
         self.input.output_ordering()
     }
 
@@ -130,8 +167,21 @@ impl ExecutionPlan for ProjectionExec {
         true
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    // Equivalence properties need to be adjusted after the Projection.
+    // 1) Add Alias, Alias can introduce additional equivalence properties,
+    //    For example:  Projection(a, a as a1, a as a2)
+    // 2) Truncate the properties that are not in the schema of the Projection
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        let mut input_equivalence_properties = self.input.equivalence_properties();
+        merge_equivalence_properties_with_alias(
+            &mut input_equivalence_properties,
+            &self.alias_map,
+        );
+        truncate_equivalence_properties_not_in_schema(
+            &mut input_equivalence_properties,
+            &self.schema,
+        );
+        input_equivalence_properties
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/repartition.rs b/datafusion/core/src/physical_plan/repartition.rs
index 5611989f0ea9c..d9deb5c9140b3 100644
--- a/datafusion/core/src/physical_plan/repartition.rs
+++ b/datafusion/core/src/physical_plan/repartition.rs
@@ -39,6 +39,7 @@ use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use super::{RecordBatchStream, SendableRecordBatchStream};
 
 use crate::execution::context::TaskContext;
+use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::PhysicalExpr;
 use futures::stream::Stream;
 use futures::StreamExt;
@@ -272,10 +273,6 @@ impl ExecutionPlan for RepartitionExec {
         vec![self.input.clone()]
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
-    }
-
     fn with_new_children(
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
@@ -294,6 +291,10 @@ impl ExecutionPlan for RepartitionExec {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
+    }
+
     fn execute(
         &self,
         partition: usize,
diff --git a/datafusion/core/src/physical_plan/sort_merge_join.rs b/datafusion/core/src/physical_plan/sort_merge_join.rs
index d84ea9a52a454..e878234c8198c 100644
--- a/datafusion/core/src/physical_plan/sort_merge_join.rs
+++ b/datafusion/core/src/physical_plan/sort_merge_join.rs
@@ -33,6 +33,7 @@ use arrow::compute::{take, SortOptions};
 use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
 use arrow::error::{ArrowError, Result as ArrowResult};
 use arrow::record_batch::RecordBatch;
+use datafusion_physical_expr::{combine_equivalence_properties, PhysicalExpr};
 use futures::{Stream, StreamExt};
 
 use crate::error::DataFusionError;
@@ -45,8 +46,8 @@ use crate::physical_plan::expressions::PhysicalSortExpr;
 use crate::physical_plan::join_utils::{build_join_schema, check_join_is_valid, JoinOn};
 use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use crate::physical_plan::{
-    metrics, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream,
-    SendableRecordBatchStream, Statistics,
+    metrics, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
+    RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
 /// join execution plan executes partitions in parallel and combines them into a set of
@@ -65,6 +66,10 @@ pub struct SortMergeJoinExec {
     schema: SchemaRef,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
+    /// The left SortExpr
+    left_sort_exprs: Vec<PhysicalSortExpr>,
+    /// The right SortExpr
+    right_sort_exprs: Vec<PhysicalSortExpr>,
     /// Sort options of join columns used in sorting left and right execution plans
     sort_options: Vec<SortOptions>,
     /// If null_equals_null is true, null == null else null != null
@@ -96,6 +101,34 @@ impl SortMergeJoinExec {
             )));
         }
 
+        let (left_expr, right_expr): (Vec<_>, Vec<_>) = on
+            .iter()
+            .map(|(l, r)| {
+                (
+                    Arc::new(l.clone()) as Arc<dyn PhysicalExpr>,
+                    Arc::new(r.clone()) as Arc<dyn PhysicalExpr>,
+                )
+            })
+            .unzip();
+
+        let left_sort_exprs = left_expr
+            .into_iter()
+            .zip(sort_options.iter())
+            .map(|(k, sort_op)| PhysicalSortExpr {
+                expr: k,
+                options: *sort_op,
+            })
+            .collect::<Vec<_>>();
+
+        let right_sort_exprs = right_expr
+            .into_iter()
+            .zip(sort_options.iter())
+            .map(|(k, sort_op)| PhysicalSortExpr {
+                expr: k,
+                options: *sort_op,
+            })
+            .collect::<Vec<_>>();
+
         let schema =
             Arc::new(build_join_schema(&left_schema, &right_schema, &join_type).0);
 
@@ -106,6 +139,8 @@ impl SortMergeJoinExec {
             join_type,
             schema,
             metrics: ExecutionPlanMetricsSet::new(),
+            left_sort_exprs,
+            right_sort_exprs,
             sort_options,
             null_equals_null,
         })
@@ -122,7 +157,14 @@ impl ExecutionPlan for SortMergeJoinExec {
     }
 
     fn output_partitioning(&self) -> Partitioning {
-        self.right.output_partitioning()
+        match self.join_type {
+            JoinType::Inner => self.left.output_partitioning(),
+            JoinType::Left => self.left.output_partitioning(),
+            JoinType::Right => self.right.output_partitioning(),
+            _ => Partitioning::UnknownPartitioning(
+                self.right.output_partitioning().partition_count(),
+            ),
+        }
     }
 
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
@@ -135,8 +177,38 @@ impl ExecutionPlan for SortMergeJoinExec {
         }
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        true
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        let (left_expr, right_expr) = self
+            .on
+            .iter()
+            .map(|(l, r)| {
+                (
+                    Arc::new(l.clone()) as Arc<dyn PhysicalExpr>,
+                    Arc::new(r.clone()) as Arc<dyn PhysicalExpr>,
+                )
+            })
+            .unzip();
+        vec![
+            Distribution::HashPartitioned(left_expr),
+            Distribution::HashPartitioned(right_expr),
+        ]
+    }
+
+    fn required_input_ordering(&self) -> Vec<Option<&[PhysicalSortExpr]>> {
+        vec![Some(&self.left_sort_exprs), Some(&self.right_sort_exprs)]
+    }
+
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        let mut left_properties = self.left.equivalence_properties();
+        let right_properties = self.right.equivalence_properties();
+        left_properties.extend(right_properties);
+
+        if self.join_type == JoinType::Inner {
+            self.on.iter().for_each(|(column1, column2)| {
+                combine_equivalence_properties(&mut left_properties, (column1, column2))
+            })
+        }
+        left_properties
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -217,8 +289,8 @@ impl ExecutionPlan for SortMergeJoinExec {
             DisplayFormatType::Default => {
                 write!(
                     f,
-                    "SortMergeJoin: join_type={:?}, on={:?}, schema={:?}",
-                    self.join_type, self.on, &self.schema
+                    "SortMergeJoin: join_type={:?}, on={:?}",
+                    self.join_type, self.on
                 )
             }
         }
diff --git a/datafusion/core/src/physical_plan/sorts/sort.rs b/datafusion/core/src/physical_plan/sorts/sort.rs
index fb9bb10a38ec9..f5abdbc2fa4d7 100644
--- a/datafusion/core/src/physical_plan/sorts/sort.rs
+++ b/datafusion/core/src/physical_plan/sorts/sort.rs
@@ -46,6 +46,7 @@ use arrow::error::{ArrowError, Result as ArrowResult};
 use arrow::ipc::reader::FileReader;
 use arrow::record_batch::RecordBatch;
 use async_trait::async_trait;
+use datafusion_physical_expr::expressions::Column;
 use futures::lock::Mutex;
 use futures::{Stream, StreamExt, TryFutureExt, TryStreamExt};
 use log::{debug, error};
@@ -736,11 +737,13 @@ impl ExecutionPlan for SortExec {
         }
     }
 
-    fn required_child_distribution(&self) -> Distribution {
+    fn required_input_distribution(&self) -> Vec<Distribution> {
         if self.preserve_partitioning {
-            Distribution::UnspecifiedDistribution
+            vec![Distribution::UnspecifiedDistribution]
         } else {
-            Distribution::SinglePartition
+            // global sort
+            // TODO support RangePartition and OrderedDistribution
+            vec![Distribution::SinglePartition]
         }
     }
 
@@ -748,12 +751,7 @@ impl ExecutionPlan for SortExec {
         vec![self.input.clone()]
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        // this operator resorts everything
-        false
-    }
-
-    fn benefits_from_input_partitioning(&self) -> bool {
+    fn prefer_parallel(&self) -> bool {
         false
     }
 
@@ -761,6 +759,10 @@ impl ExecutionPlan for SortExec {
         Some(&self.expr)
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
+    }
+
     fn with_new_children(
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
diff --git a/datafusion/core/src/physical_plan/sorts/sort_preserving_merge.rs b/datafusion/core/src/physical_plan/sorts/sort_preserving_merge.rs
index 5db3c50e6c141..c003ba788e80e 100644
--- a/datafusion/core/src/physical_plan/sorts/sort_preserving_merge.rs
+++ b/datafusion/core/src/physical_plan/sorts/sort_preserving_merge.rs
@@ -35,6 +35,7 @@ use arrow::{
     error::{ArrowError, Result as ArrowResult},
     record_batch::RecordBatch,
 };
+use datafusion_physical_expr::expressions::Column;
 use futures::stream::{Fuse, FusedStream};
 use futures::{Stream, StreamExt};
 use tokio::sync::mpsc;
@@ -122,18 +123,22 @@ impl ExecutionPlan for SortPreservingMergeExec {
         Partitioning::UnknownPartitioning(1)
     }
 
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        vec![Distribution::UnspecifiedDistribution]
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        true
+    fn required_input_ordering(&self) -> Vec<Option<&[PhysicalSortExpr]>> {
+        vec![Some(&self.expr)]
     }
 
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
         Some(&self.expr)
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         vec![self.input.clone()]
     }
diff --git a/datafusion/core/src/physical_plan/union.rs b/datafusion/core/src/physical_plan/union.rs
index bf9dfbd1b694c..ec297a4069f16 100644
--- a/datafusion/core/src/physical_plan/union.rs
+++ b/datafusion/core/src/physical_plan/union.rs
@@ -27,9 +27,11 @@ use arrow::{
     datatypes::{Field, Schema, SchemaRef},
     record_batch::RecordBatch,
 };
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::sort_expr_list_eq_strict_order;
 use futures::StreamExt;
 use itertools::Itertools;
-use log::debug;
+use log::{debug, warn};
 
 use super::{
     expressions::PhysicalSortExpr,
@@ -38,6 +40,7 @@ use super::{
     SendableRecordBatchStream, Statistics,
 };
 use crate::execution::context::TaskContext;
+use crate::physical_plan::CombinedRecordBatchStream;
 use crate::{
     error::Result,
     physical_plan::{expressions, metrics::BaselineMetrics},
@@ -52,6 +55,8 @@ pub struct UnionExec {
     metrics: ExecutionPlanMetricsSet,
     /// Schema of Union
     schema: SchemaRef,
+    /// Partition aware Union
+    partition_aware: bool,
 }
 
 impl UnionExec {
@@ -78,10 +83,24 @@ impl UnionExec {
             inputs[0].schema().metadata().clone(),
         ));
 
+        // If all the input partitions have the same Hash partition spec with the first_input_partition
+        // The UnionExec is partition aware.
+        //
+        // It might be too strict here in the case that the input partition specs are compatible but not exactly the same.
+        // For example one input partition has the partition spec Hash('a','b','c') and
+        // other has the partition spec Hash('a'), It is safe to derive the out partition with the spec Hash('a','b','c').
+        let first_input_partition = inputs[0].output_partitioning();
+        let partition_aware = matches!(first_input_partition, Partitioning::Hash(_, _))
+            && inputs
+                .iter()
+                .map(|plan| plan.output_partitioning())
+                .all(|partition| partition == first_input_partition);
+
         UnionExec {
             inputs,
             metrics: ExecutionPlanMetricsSet::new(),
             schema,
+            partition_aware,
         }
     }
 
@@ -105,25 +124,52 @@ impl ExecutionPlan for UnionExec {
         self.inputs.clone()
     }
 
-    /// Output of the union is the combination of all output partitions of the inputs
     fn output_partitioning(&self) -> Partitioning {
-        // Sums all the output partitions
-        let num_partitions = self
-            .inputs
-            .iter()
-            .map(|plan| plan.output_partitioning().partition_count())
-            .sum();
-        // TODO: this loses partitioning info in case of same partitioning scheme (for example `Partitioning::Hash`)
-        // https://issues.apache.org/jira/browse/ARROW-11991
-        Partitioning::UnknownPartitioning(num_partitions)
+        if self.partition_aware {
+            self.inputs[0].output_partitioning()
+        } else {
+            // Output the combination of all output partitions of the inputs if the Union is not partition aware
+            let num_partitions = self
+                .inputs
+                .iter()
+                .map(|plan| plan.output_partitioning().partition_count())
+                .sum();
+
+            Partitioning::UnknownPartitioning(num_partitions)
+        }
     }
 
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        None
+        let first_input_ordering = self.inputs[0].output_ordering();
+        // If the Union is not partition aware and all the input ordering spec strictly equal with the first_input_ordering
+        // Return the first_input_ordering as the output_ordering
+        //
+        // It might be too strict here in the case that the input ordering are compatible but not exactly the same.
+        // For example one input ordering has the ordering spec SortExpr('a','b','c') and the other has the ordering
+        // spec SortExpr('a'), It is safe to derive the out ordering with the spec SortExpr('a').
+        if !self.partition_aware
+            && first_input_ordering.is_some()
+            && self
+                .inputs
+                .iter()
+                .map(|plan| plan.output_ordering())
+                .all(|ordering| {
+                    ordering.is_some()
+                        && sort_expr_list_eq_strict_order(
+                            ordering.unwrap(),
+                            first_input_ordering.unwrap(),
+                        )
+                })
+        {
+            first_input_ordering
+        } else {
+            None
+        }
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        // TODO calculate the common equivalence properties among all the inputs
+        vec![]
     }
 
     fn with_new_children(
@@ -145,19 +191,38 @@ impl ExecutionPlan for UnionExec {
         let elapsed_compute = baseline_metrics.elapsed_compute().clone();
         let _timer = elapsed_compute.timer(); // record on drop
 
-        // find partition to execute
-        for input in self.inputs.iter() {
-            // Calculate whether partition belongs to the current partition
-            if partition < input.output_partitioning().partition_count() {
-                let stream = input.execute(partition, context)?;
-                debug!("Found a Union partition to execute");
+        if self.partition_aware {
+            let mut input_stream_vec = vec![];
+            for input in self.inputs.iter() {
+                if partition < input.output_partitioning().partition_count() {
+                    input_stream_vec.push(input.execute(partition, context.clone())?);
+                } else {
+                    // Do not find a partition to execute
+                    break;
+                }
+            }
+            if input_stream_vec.len() == self.inputs.len() {
+                let stream = Box::pin(CombinedRecordBatchStream::new(
+                    self.schema(),
+                    input_stream_vec,
+                ));
                 return Ok(Box::pin(ObservedStream::new(stream, baseline_metrics)));
-            } else {
-                partition -= input.output_partitioning().partition_count();
+            }
+        } else {
+            // find partition to execute
+            for input in self.inputs.iter() {
+                // Calculate whether partition belongs to the current partition
+                if partition < input.output_partitioning().partition_count() {
+                    let stream = input.execute(partition, context)?;
+                    debug!("Found a Union partition to execute");
+                    return Ok(Box::pin(ObservedStream::new(stream, baseline_metrics)));
+                } else {
+                    partition -= input.output_partitioning().partition_count();
+                }
             }
         }
 
-        debug!("Error in Union: Partition {} not found", partition);
+        warn!("Error in Union: Partition {} not found", partition);
 
         Err(crate::error::DataFusionError::Execution(format!(
             "Partition {} not found in Union",
@@ -189,7 +254,7 @@ impl ExecutionPlan for UnionExec {
             .unwrap_or_default()
     }
 
-    fn benefits_from_input_partitioning(&self) -> bool {
+    fn prefer_parallel(&self) -> bool {
         false
     }
 }
diff --git a/datafusion/core/src/physical_plan/values.rs b/datafusion/core/src/physical_plan/values.rs
index 897936814ceea..56a8d2e494ac0 100644
--- a/datafusion/core/src/physical_plan/values.rs
+++ b/datafusion/core/src/physical_plan/values.rs
@@ -22,13 +22,14 @@ use super::{common, SendableRecordBatchStream, Statistics};
 use crate::error::{DataFusionError, Result};
 use crate::execution::context::TaskContext;
 use crate::physical_plan::{
-    memory::MemoryStream, ColumnarValue, DisplayFormatType, Distribution, ExecutionPlan,
-    Partitioning, PhysicalExpr,
+    memory::MemoryStream, ColumnarValue, DisplayFormatType, ExecutionPlan, Partitioning,
+    PhysicalExpr,
 };
 use crate::scalar::ScalarValue;
 use arrow::array::new_null_array;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
+use datafusion_physical_expr::expressions::Column;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -109,10 +110,6 @@ impl ExecutionPlan for ValuesExec {
         vec![]
     }
 
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::UnspecifiedDistribution
-    }
-
     /// Get the output partitioning of this plan
     fn output_partitioning(&self) -> Partitioning {
         Partitioning::UnknownPartitioning(1)
@@ -122,8 +119,8 @@ impl ExecutionPlan for ValuesExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
     }
 
     fn with_new_children(
diff --git a/datafusion/core/src/physical_plan/windows/mod.rs b/datafusion/core/src/physical_plan/windows/mod.rs
index 26cb14fe33a96..1f995c589132c 100644
--- a/datafusion/core/src/physical_plan/windows/mod.rs
+++ b/datafusion/core/src/physical_plan/windows/mod.rs
@@ -210,6 +210,8 @@ mod tests {
             ],
             input,
             schema.clone(),
+            vec![],
+            None,
         )?);
 
         let result: Vec<RecordBatch> = collect(window_exec, task_ctx).await?;
@@ -255,6 +257,8 @@ mod tests {
             )?],
             blocking_exec,
             schema,
+            vec![],
+            None,
         )?);
 
         let fut = collect(window_agg_exec, task_ctx);
diff --git a/datafusion/core/src/physical_plan/windows/window_agg_exec.rs b/datafusion/core/src/physical_plan/windows/window_agg_exec.rs
index e9eac35a3d883..457ac165f886f 100644
--- a/datafusion/core/src/physical_plan/windows/window_agg_exec.rs
+++ b/datafusion/core/src/physical_plan/windows/window_agg_exec.rs
@@ -33,8 +33,11 @@ use arrow::{
     error::{ArrowError, Result as ArrowResult},
     record_batch::RecordBatch,
 };
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::PhysicalExpr;
 use futures::stream::Stream;
 use futures::{ready, StreamExt};
+use log::warn;
 use std::any::Any;
 use std::pin::Pin;
 use std::sync::Arc;
@@ -51,6 +54,10 @@ pub struct WindowAggExec {
     schema: SchemaRef,
     /// Schema before the window
     input_schema: SchemaRef,
+    /// Partition Keys
+    partition_keys: Vec<Arc<dyn PhysicalExpr>>,
+    /// Sort Keys
+    sort_keys: Option<Vec<PhysicalSortExpr>>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
 }
@@ -61,6 +68,8 @@ impl WindowAggExec {
         window_expr: Vec<Arc<dyn WindowExpr>>,
         input: Arc<dyn ExecutionPlan>,
         input_schema: SchemaRef,
+        partition_keys: Vec<Arc<dyn PhysicalExpr>>,
+        sort_keys: Option<Vec<PhysicalSortExpr>>,
     ) -> Result<Self> {
         let schema = create_schema(&input_schema, &window_expr)?;
         let schema = Arc::new(schema);
@@ -69,6 +78,8 @@ impl WindowAggExec {
             window_expr,
             schema,
             input_schema,
+            partition_keys,
+            sort_keys,
             metrics: ExecutionPlanMetricsSet::new(),
         })
     }
@@ -119,22 +130,25 @@ impl ExecutionPlan for WindowAggExec {
         true
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        true
+    fn required_input_ordering(&self) -> Vec<Option<&[PhysicalSortExpr]>> {
+        let sort_keys = self.sort_keys.as_deref();
+        vec![sort_keys]
     }
 
-    fn required_child_distribution(&self) -> Distribution {
-        if self
-            .window_expr()
-            .iter()
-            .all(|expr| expr.partition_by().is_empty())
-        {
-            Distribution::SinglePartition
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        if self.partition_keys.is_empty() {
+            warn!("No partition defined for WindowAggExec!!!");
+            vec![Distribution::SinglePartition]
         } else {
-            Distribution::UnspecifiedDistribution
+            //TODO support PartitionCollections if there is no common partition columns in the window_expr
+            vec![Distribution::HashPartitioned(self.partition_keys.clone())]
         }
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        self.input.equivalence_properties()
+    }
+
     fn with_new_children(
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
@@ -143,6 +157,8 @@ impl ExecutionPlan for WindowAggExec {
             self.window_expr.clone(),
             children[0].clone(),
             self.input_schema.clone(),
+            self.partition_keys.clone(),
+            self.sort_keys.clone(),
         )?))
     }
 
diff --git a/datafusion/core/src/scheduler/pipeline/execution.rs b/datafusion/core/src/scheduler/pipeline/execution.rs
index 20e7c6e79a48c..58f535c1c54cf 100644
--- a/datafusion/core/src/scheduler/pipeline/execution.rs
+++ b/datafusion/core/src/scheduler/pipeline/execution.rs
@@ -235,20 +235,16 @@ impl ExecutionPlan for ProxyExecutionPlan {
         self.inner.output_ordering()
     }
 
-    fn required_child_distribution(&self) -> Distribution {
-        self.inner.required_child_distribution()
-    }
-
-    fn relies_on_input_order(&self) -> bool {
-        self.inner.relies_on_input_order()
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        self.inner.required_input_distribution()
     }
 
     fn maintains_input_order(&self) -> bool {
         self.inner.maintains_input_order()
     }
 
-    fn benefits_from_input_partitioning(&self) -> bool {
-        self.inner.benefits_from_input_partitioning()
+    fn prefer_parallel(&self) -> bool {
+        self.inner.prefer_parallel()
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/core/src/test/exec.rs b/datafusion/core/src/test/exec.rs
index 855bb3bbc11e7..f15afe947695e 100644
--- a/datafusion/core/src/test/exec.rs
+++ b/datafusion/core/src/test/exec.rs
@@ -30,6 +30,7 @@ use arrow::{
     error::{ArrowError, Result as ArrowResult},
     record_batch::RecordBatch,
 };
+use datafusion_physical_expr::expressions::Column;
 use futures::Stream;
 
 use crate::execution::context::TaskContext;
@@ -154,6 +155,10 @@ impl ExecutionPlan for MockExec {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         unimplemented!()
     }
@@ -292,6 +297,10 @@ impl ExecutionPlan for BarrierExec {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         unimplemented!()
     }
@@ -392,6 +401,10 @@ impl ExecutionPlan for ErrorExec {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         unimplemented!()
     }
@@ -471,6 +484,10 @@ impl ExecutionPlan for StatisticsExec {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         vec![]
     }
@@ -569,6 +586,10 @@ impl ExecutionPlan for BlockingExec {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn with_new_children(
         self: Arc<Self>,
         _: Vec<Arc<dyn ExecutionPlan>>,
diff --git a/datafusion/core/tests/custom_sources.rs b/datafusion/core/tests/custom_sources.rs
index 88131313bc397..9ab61cd4aaa14 100644
--- a/datafusion/core/tests/custom_sources.rs
+++ b/datafusion/core/tests/custom_sources.rs
@@ -47,6 +47,7 @@ use std::task::{Context, Poll};
 
 use async_trait::async_trait;
 use datafusion::logical_plan::plan::Projection;
+use datafusion_physical_expr::expressions::Column;
 
 //// Custom source dataframe tests ////
 
@@ -119,6 +120,10 @@ impl ExecutionPlan for CustomExecutionPlan {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         vec![]
     }
diff --git a/datafusion/core/tests/provider_filter_pushdown.rs b/datafusion/core/tests/provider_filter_pushdown.rs
index 117b4eb143e23..7d993c2c03100 100644
--- a/datafusion/core/tests/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/provider_filter_pushdown.rs
@@ -74,6 +74,12 @@ impl ExecutionPlan for CustomPlan {
         None
     }
 
+    fn equivalence_properties(
+        &self,
+    ) -> Vec<Vec<datafusion_physical_expr::expressions::Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         vec![]
     }
diff --git a/datafusion/core/tests/statistics.rs b/datafusion/core/tests/statistics.rs
index 95879ebaf679f..aabea42d343de 100644
--- a/datafusion/core/tests/statistics.rs
+++ b/datafusion/core/tests/statistics.rs
@@ -35,6 +35,7 @@ use datafusion::{
 
 use async_trait::async_trait;
 use datafusion::execution::context::{SessionState, TaskContext};
+use datafusion_physical_expr::expressions::Column;
 
 /// This is a testing structure for statistics
 /// It will act both as a table provider and execution plan
@@ -128,6 +129,10 @@ impl ExecutionPlan for StatisticsValidation {
         None
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         vec![]
     }
diff --git a/datafusion/core/tests/user_defined_plan.rs b/datafusion/core/tests/user_defined_plan.rs
index c577e48e78000..5aaef1ed2241d 100644
--- a/datafusion/core/tests/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined_plan.rs
@@ -91,6 +91,7 @@ use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::logical_plan::plan::{Extension, Sort};
 use datafusion::logical_plan::{DFSchemaRef, Limit};
 use datafusion::optimizer::optimizer::OptimizerConfig;
+use datafusion_physical_expr::expressions::Column;
 
 /// Execute the specified sql and return the resulting record batches
 /// pretty printed as a String.
@@ -442,12 +443,12 @@ impl ExecutionPlan for TopKExec {
         None
     }
 
-    fn relies_on_input_order(&self) -> bool {
-        false
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        vec![Distribution::SinglePartition]
     }
 
-    fn required_child_distribution(&self) -> Distribution {
-        Distribution::SinglePartition
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
     }
 
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index ba618acb64bfb..7deff3738d255 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -56,3 +56,4 @@ rand = "0.8"
 regex = { version = "^1.4.3", optional = true }
 sha2 = { version = "^0.10.1", optional = true }
 unicode-segmentation = { version = "^1.7.1", optional = true }
+itertools = "0.10.3"
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 02bf0e5bdab39..4966a7dd090d2 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -73,6 +73,7 @@ use kernels_arrow::{
 use arrow::datatypes::{DataType, Schema, TimeUnit};
 use arrow::record_batch::RecordBatch;
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use datafusion_common::ScalarValue;
 use datafusion_common::{DataFusionError, Result};
@@ -640,6 +641,30 @@ impl PhysicalExpr for BinaryExpr {
         self.evaluate_with_resolved_args(left, &left_data_type, right, &right_data_type)
             .map(|a| ColumnarValue::Array(a))
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.left.clone(), self.right.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(BinaryExpr::new(
+            children[0].clone(),
+            self.op,
+            children[1].clone(),
+        )))
+    }
+}
+
+impl PartialEq<dyn Any> for BinaryExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.left.eq(&x.left) && self.op == x.op && self.right.eq(&x.right))
+            .unwrap_or(false)
+    }
 }
 
 /// unwrap underlying (non dictionary) value, if any, to pass to a scalar kernel
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index cf4f7defe7f36..4f7b3dc989ee4 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -27,6 +27,9 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::binary_rule::comparison_coercion;
 use datafusion_expr::ColumnarValue;
+use itertools::Itertools;
+
+use crate::expressions::no_op::NoOp;
 
 type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
 
@@ -287,6 +290,67 @@ impl PhysicalExpr for CaseExpr {
             self.case_when_no_expr(batch)
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        let mut chileren = vec![];
+        match &self.expr {
+            Some(expr) => chileren.push(expr.clone()),
+            None => chileren.push(Arc::new(NoOp::new())),
+        }
+        self.when_then_expr.iter().for_each(|(cond, value)| {
+            chileren.push(cond.clone());
+            chileren.push(value.clone());
+        });
+
+        match &self.else_expr {
+            Some(expr) => chileren.push(expr.clone()),
+            None => chileren.push(Arc::new(NoOp::new())),
+        }
+        chileren
+    }
+
+    // For physical CaseExpr, we do not allow modifying children size
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        if children.len() != self.children().len() {
+            Err(DataFusionError::Internal(
+                "CaseExpr: Wrong number of children".to_string(),
+            ))
+        } else {
+            assert_eq!(children.len() % 2, 0);
+            let expr = match children[0].clone().as_any().downcast_ref::<NoOp>() {
+                Some(_) => None,
+                _ => Some(children[0].clone()),
+            };
+            let else_expr = match children[children.len() - 1]
+                .clone()
+                .as_any()
+                .downcast_ref::<NoOp>()
+            {
+                Some(_) => None,
+                _ => Some(children[children.len() - 1].clone()),
+            };
+
+            let branches = children[1..children.len() - 1].to_vec();
+            let mut when_then_expr: Vec<WhenThen> = vec![];
+            for (prev, next) in branches.into_iter().tuples() {
+                when_then_expr.push((prev, next));
+            }
+            Ok(Arc::new(CaseExpr::try_new(
+                expr,
+                when_then_expr,
+                else_expr,
+            )?))
+        }
+    }
+}
+
+impl PartialEq<dyn Any> for CaseExpr {
+    fn eq(&self, _other: &dyn Any) -> bool {
+        false
+    }
 }
 
 /// Create a CASE expression
diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs
index 5affc857c6721..61972d5370298 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -19,6 +19,7 @@ use std::any::Any;
 use std::fmt;
 use std::sync::Arc;
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use arrow::compute;
 use arrow::compute::kernels;
@@ -93,6 +94,36 @@ impl PhysicalExpr for CastExpr {
         let value = self.expr.evaluate(batch)?;
         cast_column(&value, &self.cast_type, &self.cast_options)
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.expr.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(CastExpr::new(
+            children[0].clone(),
+            self.cast_type.clone(),
+            CastOptions {
+                safe: self.cast_options.safe,
+            },
+        )))
+    }
+}
+
+impl PartialEq<dyn Any> for CastExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.expr.eq(&x.expr)
+                    && self.cast_type == x.cast_type
+                    && self.cast_options.safe == x.cast_options.safe
+            })
+            .unwrap_or(false)
+    }
 }
 
 /// Internal cast function for casting ColumnarValue -> ColumnarValue for cast_type
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index 63f8c405308e0..e6f5c6dd1124b 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -17,6 +17,7 @@
 
 //! Column expression
 
+use std::any::Any;
 use std::sync::Arc;
 
 use arrow::{
@@ -24,6 +25,7 @@ use arrow::{
     record_batch::RecordBatch,
 };
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
@@ -89,6 +91,26 @@ impl PhysicalExpr for Column {
         self.bounds_check(batch.schema().as_ref())?;
         Ok(ColumnarValue::Array(batch.column(self.index).clone()))
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(self)
+    }
+}
+
+impl PartialEq<dyn Any> for Column {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self == x)
+            .unwrap_or(false)
+    }
 }
 
 impl Column {
diff --git a/datafusion/physical-expr/src/expressions/datetime.rs b/datafusion/physical-expr/src/expressions/datetime.rs
index fa021f61a940f..3c7096c7256cd 100644
--- a/datafusion/physical-expr/src/expressions/datetime.rs
+++ b/datafusion/physical-expr/src/expressions/datetime.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use crate::expressions::delta::shift_months;
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use arrow::array::{
     Array, ArrayRef, Date32Array, Date64Array, TimestampMicrosecondArray,
@@ -42,6 +43,7 @@ pub struct DateTimeIntervalExpr {
     lhs: Arc<dyn PhysicalExpr>,
     op: Operator,
     rhs: Arc<dyn PhysicalExpr>,
+    input_schema: Schema,
 }
 
 impl DateTimeIntervalExpr {
@@ -56,7 +58,12 @@ impl DateTimeIntervalExpr {
             DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) => {
                 match rhs.data_type(input_schema)? {
                     DataType::Interval(_) => match &op {
-                        Operator::Plus | Operator::Minus => Ok(Self { lhs, op, rhs }),
+                        Operator::Plus | Operator::Minus => Ok(Self {
+                            lhs,
+                            op,
+                            rhs,
+                            input_schema: input_schema.clone(),
+                        }),
                         _ => Err(DataFusionError::Execution(format!(
                             "Invalid operator '{}' for DateIntervalExpr",
                             op
@@ -140,6 +147,31 @@ impl PhysicalExpr for DateTimeIntervalExpr {
             ColumnarValue::Array(array) => evaluate_array(array, sign, intervals),
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.lhs.clone(), self.rhs.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(DateTimeIntervalExpr::try_new(
+            children[0].clone(),
+            self.op,
+            children[1].clone(),
+            &self.input_schema,
+        )?))
+    }
+}
+
+impl PartialEq<dyn Any> for DateTimeIntervalExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.lhs.eq(&x.lhs) && self.op == x.op && self.rhs.eq(&x.rhs))
+            .unwrap_or(false)
+    }
 }
 
 pub fn evaluate_array(
diff --git a/datafusion/physical-expr/src/expressions/get_indexed_field.rs b/datafusion/physical-expr/src/expressions/get_indexed_field.rs
index 5d9b1594d4383..ff10c06e2206f 100644
--- a/datafusion/physical-expr/src/expressions/get_indexed_field.rs
+++ b/datafusion/physical-expr/src/expressions/get_indexed_field.rs
@@ -22,6 +22,7 @@ use arrow::array::Array;
 use arrow::array::{ListArray, StructArray};
 use arrow::compute::concat;
 
+use crate::physical_expr::down_cast_any_ref;
 use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
@@ -132,6 +133,29 @@ impl PhysicalExpr for GetIndexedFieldExpr {
             (dt, key) => Err(DataFusionError::Execution(format!("get indexed field is only possible on lists with int64 indexes or struct with utf8 indexes. Tried {:?} with {:?} index", dt, key))),
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.arg.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(GetIndexedFieldExpr::new(
+            children[0].clone(),
+            self.key.clone(),
+        )))
+    }
+}
+
+impl PartialEq<dyn Any> for GetIndexedFieldExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.arg.eq(&x.arg) && self.key == x.key)
+            .unwrap_or(false)
+    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index ae49b4cec4ace..3ab68b4a9feae 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -19,6 +19,7 @@
 
 use std::any::Any;
 use std::collections::HashSet;
+use std::fmt::Debug;
 use std::sync::Arc;
 
 use arrow::array::GenericStringArray;
@@ -33,6 +34,7 @@ use arrow::{
     record_batch::RecordBatch,
 };
 
+use crate::physical_expr::{down_cast_any_ref, expr_list_eq_any_order};
 use crate::PhysicalExpr;
 use arrow::array::*;
 use arrow::datatypes::TimeUnit;
@@ -52,16 +54,27 @@ use datafusion_expr::ColumnarValue;
 static OPTIMIZER_INSET_THRESHOLD: usize = 30;
 
 /// InList
-#[derive(Debug)]
 pub struct InListExpr {
     expr: Arc<dyn PhysicalExpr>,
     list: Vec<Arc<dyn PhysicalExpr>>,
     negated: bool,
     set: Option<InSet>,
+    input_schema: Schema,
+}
+
+impl Debug for InListExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("InListExpr")
+            .field("expr", &self.expr)
+            .field("list", &self.list)
+            .field("negated", &self.negated)
+            .field("set", &self.set)
+            .finish()
+    }
 }
 
 /// InSet
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct InSet {
     // TODO: optimization: In the `IN` or `NOT IN` we don't need to consider the NULL value
     // The data type is same, we can use  set: HashSet<T>
@@ -386,6 +399,7 @@ impl InListExpr {
                     set: Some(InSet::new(set)),
                     list,
                     negated,
+                    input_schema: schema.clone(),
                 };
             }
         }
@@ -394,6 +408,7 @@ impl InListExpr {
             list,
             negated,
             set: None,
+            input_schema: schema.clone(),
         }
     }
 
@@ -899,6 +914,39 @@ impl PhysicalExpr for InListExpr {
             }
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        let mut children = vec![];
+        children.push(self.expr.clone());
+        children.extend(self.list.clone());
+        children
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        in_list(
+            children[0].clone(),
+            children[1..].to_vec(),
+            &self.negated,
+            &self.input_schema,
+        )
+    }
+}
+
+impl PartialEq<dyn Any> for InListExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| {
+                self.expr.eq(&x.expr)
+                    && expr_list_eq_any_order(&self.list, &x.list)
+                    && self.negated == x.negated
+                    && self.set == x.set
+            })
+            .unwrap_or(false)
+    }
 }
 
 /// Creates a unary expression InList
diff --git a/datafusion/physical-expr/src/expressions/is_not_null.rs b/datafusion/physical-expr/src/expressions/is_not_null.rs
index 6b614f3d98ca1..4e24159acd13b 100644
--- a/datafusion/physical-expr/src/expressions/is_not_null.rs
+++ b/datafusion/physical-expr/src/expressions/is_not_null.rs
@@ -19,6 +19,7 @@
 
 use std::{any::Any, sync::Arc};
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use arrow::compute;
 use arrow::{
@@ -79,6 +80,26 @@ impl PhysicalExpr for IsNotNullExpr {
             )),
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.arg.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(IsNotNullExpr::new(children[0].clone())))
+    }
+}
+
+impl PartialEq<dyn Any> for IsNotNullExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.arg.eq(&x.arg))
+            .unwrap_or(false)
+    }
 }
 
 /// Create an IS NOT NULL expression
diff --git a/datafusion/physical-expr/src/expressions/is_null.rs b/datafusion/physical-expr/src/expressions/is_null.rs
index e5dbfbdc74819..6ee11820c7f09 100644
--- a/datafusion/physical-expr/src/expressions/is_null.rs
+++ b/datafusion/physical-expr/src/expressions/is_null.rs
@@ -25,6 +25,7 @@ use arrow::{
     record_batch::RecordBatch,
 };
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use datafusion_common::Result;
 use datafusion_common::ScalarValue;
@@ -80,6 +81,26 @@ impl PhysicalExpr for IsNullExpr {
             )),
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.arg.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(IsNullExpr::new(children[0].clone())))
+    }
+}
+
+impl PartialEq<dyn Any> for IsNullExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.arg.eq(&x.arg))
+            .unwrap_or(false)
+    }
 }
 
 /// Create an IS NULL expression
diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
index e711f57c0e3bf..3c09cd5a863d6 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -25,13 +25,14 @@ use arrow::{
     record_batch::RecordBatch,
 };
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use datafusion_common::Result;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, Expr};
 
 /// Represents a literal value
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct Literal {
     value: ScalarValue,
 }
@@ -71,6 +72,26 @@ impl PhysicalExpr for Literal {
     fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
         Ok(ColumnarValue::Scalar(self.value.clone()))
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(self)
+    }
+}
+
+impl PartialEq<dyn Any> for Literal {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self == x)
+            .unwrap_or(false)
+    }
 }
 
 /// Create a literal expression
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 00bf6aafade46..29ffe0220d93e 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -30,6 +30,7 @@ mod is_not_null;
 mod is_null;
 mod literal;
 mod negative;
+mod no_op;
 mod not;
 mod nullif;
 mod try_cast;
diff --git a/datafusion/physical-expr/src/expressions/negative.rs b/datafusion/physical-expr/src/expressions/negative.rs
index 0307f7184371a..15897e108eb4f 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -28,6 +28,7 @@ use arrow::{
     record_batch::RecordBatch,
 };
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::{binary_rule::is_signed_numeric, ColumnarValue};
@@ -107,6 +108,26 @@ impl PhysicalExpr for NegativeExpr {
             }
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.arg.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(NegativeExpr::new(children[0].clone())))
+    }
+}
+
+impl PartialEq<dyn Any> for NegativeExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.arg.eq(&x.arg))
+            .unwrap_or(false)
+    }
 }
 
 /// Creates a unary expression NEGATIVE
diff --git a/datafusion/physical-expr/src/expressions/no_op.rs b/datafusion/physical-expr/src/expressions/no_op.rs
new file mode 100644
index 0000000000000..2c916ed321c99
--- /dev/null
+++ b/datafusion/physical-expr/src/expressions/no_op.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Literal expressions for physical operations
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::{
+    datatypes::{DataType, Schema},
+    record_batch::RecordBatch,
+};
+
+use crate::physical_expr::down_cast_any_ref;
+use crate::PhysicalExpr;
+use datafusion_common::Result;
+use datafusion_expr::ColumnarValue;
+
+/// A place holder expressions, can not be evaluated
+#[derive(Debug, PartialEq, Eq)]
+pub struct NoOp {}
+
+impl NoOp {
+    /// Create a NoOp expression
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl std::fmt::Display for NoOp {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "NoOp")
+    }
+}
+
+impl PhysicalExpr for NoOp {
+    /// Return a reference to Any that can be used for downcasting
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
+        Ok(DataType::Null)
+    }
+
+    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
+        Ok(true)
+    }
+
+    fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
+        unimplemented!("NoOp::evaluate");
+    }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(self)
+    }
+}
+
+impl PartialEq<dyn Any> for NoOp {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self == x)
+            .unwrap_or(false)
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/not.rs b/datafusion/physical-expr/src/expressions/not.rs
index a7fba60ec362a..00f1670af7dc3 100644
--- a/datafusion/physical-expr/src/expressions/not.rs
+++ b/datafusion/physical-expr/src/expressions/not.rs
@@ -21,6 +21,7 @@ use std::any::Any;
 use std::fmt;
 use std::sync::Arc;
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use arrow::array::BooleanArray;
 use arrow::datatypes::{DataType, Schema};
@@ -103,6 +104,26 @@ impl PhysicalExpr for NotExpr {
             }
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.arg.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(NotExpr::new(children[0].clone())))
+    }
+}
+
+impl PartialEq<dyn Any> for NotExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.arg.eq(&x.arg))
+            .unwrap_or(false)
+    }
 }
 
 /// Creates a unary expression NOT
diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs
index 154ec85e58ecd..77e0d52b37b49 100644
--- a/datafusion/physical-expr/src/expressions/try_cast.rs
+++ b/datafusion/physical-expr/src/expressions/try_cast.rs
@@ -19,6 +19,7 @@ use std::any::Any;
 use std::fmt;
 use std::sync::Arc;
 
+use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
 use arrow::compute;
 use arrow::compute::kernels;
@@ -90,6 +91,29 @@ impl PhysicalExpr for TryCastExpr {
             }
         }
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        vec![self.expr.clone()]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(TryCastExpr::new(
+            children[0].clone(),
+            self.cast_type.clone(),
+        )))
+    }
+}
+
+impl PartialEq<dyn Any> for TryCastExpr {
+    fn eq(&self, other: &dyn Any) -> bool {
+        down_cast_any_ref(other)
+            .downcast_ref::<Self>()
+            .map(|x| self.expr.eq(&x.expr) && self.cast_type == x.cast_type)
+            .unwrap_or(false)
+    }
 }
 
 /// Return a PhysicalExpression representing `expr` casted to
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 7511fb2624cb0..861911bb8e880 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -37,12 +37,23 @@ pub mod type_coercion;
 pub mod udf;
 #[cfg(feature = "unicode_expressions")]
 pub mod unicode_expressions;
+mod utils;
 pub mod var_provider;
 pub mod window;
 
 // reexport this to maintain compatibility with anything that used from_slice previously
 pub use aggregate::AggregateExpr;
 pub use datafusion_common::from_slice;
+pub use physical_expr::combine_equivalence_properties;
+pub use physical_expr::expr_list_eq_any_order;
+pub use physical_expr::merge_equivalence_properties_with_alias;
+pub use physical_expr::normalize_expr_with_equivalence_properties;
+pub use physical_expr::normalize_out_expr_with_alias_schema;
+pub use physical_expr::normalize_sort_expr_with_equivalence_properties;
+pub use physical_expr::remove_equivalence_properties;
+pub use physical_expr::sort_expr_list_eq_strict_order;
+pub use physical_expr::split_predicate;
+pub use physical_expr::truncate_equivalence_properties_not_in_schema;
 pub use physical_expr::PhysicalExpr;
 pub use planner::create_physical_expr;
 pub use scalar_function::ScalarFunctionExpr;
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index 62f5541273ef7..caaec406ef74b 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -15,22 +15,27 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::{DataType, Schema};
+use arrow::datatypes::{DataType, Schema, SchemaRef};
 
 use arrow::record_batch::RecordBatch;
 
 use datafusion_common::Result;
 
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, Operator};
 use std::fmt::{Debug, Display};
 
+use crate::expressions::{BinaryExpr, Column};
+use crate::utils::transform;
+use crate::PhysicalSortExpr;
 use arrow::array::{make_array, Array, ArrayRef, BooleanArray, MutableArrayData};
 use arrow::compute::{and_kleene, filter_record_batch, is_not_null, SlicesIterator};
 use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
 
 /// Expression that can be evaluated against a RecordBatch
 /// A Physical expression knows its type, nullability and how to evaluate itself.
-pub trait PhysicalExpr: Send + Sync + Display + Debug {
+pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
     /// Returns the physical expression as [`Any`](std::any::Any) so that it can be
     /// downcast to a specific implementation.
     fn as_any(&self) -> &dyn Any;
@@ -61,6 +66,15 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug {
             Ok(tmp_result)
         }
     }
+
+    /// Get a list of child PhysicalExpr that provide the input for this plan.
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>>;
+
+    /// Returns a new PhysicalExpr where all children were replaced by new exprs.
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>>;
 }
 
 /// Scatter `truthy` array by boolean mask. When the mask evaluates `true`, next values of `truthy`
@@ -106,6 +120,235 @@ fn scatter(mask: &BooleanArray, truthy: &dyn Array) -> Result<ArrayRef> {
     Ok(make_array(data))
 }
 
+/// Compare the two expr lists are equal no matter the order.
+/// For example two InListExpr can be considered to be equals no matter the order:
+///
+/// In('a','b','c') == In('c','b','a')
+///
+/// Another example is for Partition Exprs, we can safely consider the below two exprs are equal:
+///
+/// HashPartitioned('a','b','c') == HashPartitioned('c','b','a')
+pub fn expr_list_eq_any_order(
+    list1: &[Arc<dyn PhysicalExpr>],
+    list2: &[Arc<dyn PhysicalExpr>],
+) -> bool {
+    list1.len() == list2.len()
+        && list1.iter().all(|e1| list2.iter().any(|e2| e2.eq(e1)))
+        && list2.iter().all(|e2| list1.iter().any(|e1| e1.eq(e2)))
+}
+
+/// Strictly compare the two sort expr lists in the given order.
+///
+/// For Physical Sort Exprs, the order matters:
+///
+/// SortExpr('a','b','c') != SortExpr('c','b','a')
+pub fn sort_expr_list_eq_strict_order(
+    list1: &[PhysicalSortExpr],
+    list2: &[PhysicalSortExpr],
+) -> bool {
+    list1.len() == list2.len() && list1.iter().zip(list2.iter()).all(|(e1, e2)| e1.eq(e2))
+}
+
+/// Assume the predicate is in the form of CNF, split the predicate to a Vec of PhysicalExprs.
+///
+/// For example, split "a1 = a2 AND b1 <= b2 AND c1 != c2" into ["a1 = a2", "b1 <= b2", "c1 != c2"]
+///
+pub fn split_predicate(predicate: &Arc<dyn PhysicalExpr>) -> Vec<&Arc<dyn PhysicalExpr>> {
+    match predicate.as_any().downcast_ref::<BinaryExpr>() {
+        Some(binary) => match binary.op() {
+            Operator::And => {
+                let mut vec1 = split_predicate(binary.left());
+                let vec2 = split_predicate(binary.right());
+                vec1.extend(vec2);
+                vec1
+            }
+            _ => vec![predicate],
+        },
+        None => vec![],
+    }
+}
+
+pub fn combine_equivalence_properties(
+    eq_properties: &mut Vec<Vec<Column>>,
+    new_condition: (&Column, &Column),
+) {
+    let mut idx1 = -1i32;
+    let mut idx2 = -1i32;
+    for (idx, prop) in eq_properties.iter_mut().enumerate() {
+        let contains_first = prop.contains(new_condition.0);
+        let contains_second = prop.contains(new_condition.1);
+        if contains_first && !contains_second {
+            prop.push(new_condition.1.clone());
+            idx1 = idx as i32;
+        } else if !contains_first && contains_second {
+            prop.push(new_condition.0.clone());
+            idx2 = idx as i32;
+        } else if contains_first && contains_second {
+            idx1 = idx as i32;
+            idx2 = idx as i32;
+            break;
+        }
+    }
+
+    if idx1 != -1 && idx2 != -1 && idx1 != idx2 {
+        // need to merge the two existing properties
+        let second_properties = eq_properties.get(idx2 as usize).unwrap().clone();
+        let first_properties = eq_properties.get_mut(idx1 as usize).unwrap();
+        for prop in second_properties {
+            first_properties.push(prop)
+        }
+        eq_properties.remove(idx2 as usize);
+    } else if idx1 == -1 && idx2 == -1 {
+        // adding new pairs
+        eq_properties.push(vec![new_condition.0.clone(), new_condition.1.clone()])
+    }
+}
+
+pub fn remove_equivalence_properties(
+    eq_properties: &mut Vec<Vec<Column>>,
+    remove_condition: (&Column, &Column),
+) {
+    let mut match_idx = -1i32;
+    for (idx, prop) in eq_properties.iter_mut().enumerate() {
+        let contains_first = prop.contains(remove_condition.0);
+        let contains_second = prop.contains(remove_condition.1);
+        if contains_first && contains_second {
+            match_idx = idx as i32;
+        }
+    }
+    if match_idx >= 0 {
+        let matches = eq_properties.get_mut(match_idx as usize).unwrap();
+        matches.retain(|e| (e != remove_condition.0 && e != remove_condition.1));
+        if matches.is_empty() {
+            eq_properties.remove(match_idx as usize);
+        }
+    }
+}
+
+pub fn merge_equivalence_properties_with_alias(
+    eq_properties: &mut Vec<Vec<Column>>,
+    alias_map: &HashMap<Column, Vec<Column>>,
+) {
+    for (column, columns) in alias_map {
+        let mut find_match = false;
+        for (_idx, prop) in eq_properties.iter_mut().enumerate() {
+            if prop.contains(column) {
+                prop.extend(columns.clone());
+                find_match = true;
+                break;
+            }
+        }
+        if !find_match {
+            let mut new_properties = vec![column.clone()];
+            new_properties.extend(columns.clone());
+            eq_properties.push(new_properties);
+        }
+    }
+}
+
+pub fn truncate_equivalence_properties_not_in_schema(
+    eq_properties: &mut Vec<Vec<Column>>,
+    schema: &SchemaRef,
+) {
+    for props in eq_properties.iter_mut() {
+        props.retain(|column| matches!(schema.index_of(column.name()), Ok(idx) if idx == column.index()))
+    }
+    eq_properties.retain(|props| !props.is_empty());
+}
+
+/// Normalize the output expressions base on Alias Map and SchemaRef.
+///
+/// 1) If there is mapping in Alias Map, replace the Column in the output expressions with the 1st Column in Alias Map
+/// 2) If the Column is invalid for the current Schema, replace the Column with a place holder Column with index = usize::MAX
+///
+pub fn normalize_out_expr_with_alias_schema(
+    expr: Arc<dyn PhysicalExpr>,
+    alias_map: &HashMap<Column, Vec<Column>>,
+    schema: &SchemaRef,
+) -> Arc<dyn PhysicalExpr> {
+    transform(expr.clone(), &|expr| {
+        let normalized_form: Option<Arc<dyn PhysicalExpr>> =
+            match expr.as_any().downcast_ref::<Column>() {
+                Some(column) => {
+                    let out = alias_map
+                        .get(column)
+                        .map(|c| {
+                            let out_col: Arc<dyn PhysicalExpr> = Arc::new(c[0].clone());
+                            out_col
+                        })
+                        .or_else(|| match schema.index_of(column.name()) {
+                            // Exactly matching, return None, no need to do the transform
+                            Ok(idx) if column.index() == idx => None,
+                            _ => {
+                                let out_col: Arc<dyn PhysicalExpr> =
+                                    Arc::new(Column::new(column.name(), usize::MAX));
+                                Some(out_col)
+                            }
+                        });
+                    out
+                }
+                None => None,
+            };
+        normalized_form
+    })
+    .unwrap_or(expr)
+}
+
+pub fn normalize_expr_with_equivalence_properties(
+    expr: Arc<dyn PhysicalExpr>,
+    eq_properties: &Vec<Vec<Column>>,
+) -> Arc<dyn PhysicalExpr> {
+    let mut normalized = expr.clone();
+    match expr.as_any().downcast_ref::<Column>() {
+        Some(column) => {
+            for prop in eq_properties {
+                if prop.contains(column) {
+                    normalized = Arc::new(prop.get(0).unwrap().clone());
+                    break;
+                }
+            }
+        }
+        None => {}
+    }
+    normalized
+}
+
+pub fn normalize_sort_expr_with_equivalence_properties(
+    sort_expr: PhysicalSortExpr,
+    eq_properties: &Vec<Vec<Column>>,
+) -> PhysicalSortExpr {
+    let mut normalized = sort_expr.clone();
+    match sort_expr.expr.as_any().downcast_ref::<Column>() {
+        Some(column) => {
+            for prop in eq_properties {
+                if prop.contains(column) {
+                    normalized = PhysicalSortExpr {
+                        expr: Arc::new(prop.get(0).unwrap().clone()),
+                        options: sort_expr.options,
+                    };
+                    break;
+                }
+            }
+        }
+        None => {}
+    }
+    normalized
+}
+
+pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
+    if any.is::<Arc<dyn PhysicalExpr>>() {
+        any.downcast_ref::<Arc<dyn PhysicalExpr>>()
+            .unwrap()
+            .as_any()
+    } else if any.is::<Box<dyn PhysicalExpr>>() {
+        any.downcast_ref::<Box<dyn PhysicalExpr>>()
+            .unwrap()
+            .as_any()
+    } else {
+        any
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index 1350d49510d58..93968b60f8b2d 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -145,4 +145,26 @@ impl PhysicalExpr for ScalarFunctionExpr {
         let fun = self.fun.as_ref();
         (fun)(&inputs)
     }
+
+    fn children(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        self.args.clone()
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(ScalarFunctionExpr::new(
+            &self.name,
+            self.fun.clone(),
+            children,
+            self.return_type(),
+        )))
+    }
+}
+
+impl PartialEq<dyn Any> for ScalarFunctionExpr {
+    fn eq(&self, _other: &dyn Any) -> bool {
+        false
+    }
 }
diff --git a/datafusion/physical-expr/src/sort_expr.rs b/datafusion/physical-expr/src/sort_expr.rs
index 79656725d4f44..a173cc8ba8b3b 100644
--- a/datafusion/physical-expr/src/sort_expr.rs
+++ b/datafusion/physical-expr/src/sort_expr.rs
@@ -33,6 +33,12 @@ pub struct PhysicalSortExpr {
     pub options: SortOptions,
 }
 
+impl PartialEq for PhysicalSortExpr {
+    fn eq(&self, other: &PhysicalSortExpr) -> bool {
+        self.options == other.options && self.expr.eq(&other.expr)
+    }
+}
+
 impl std::fmt::Display for PhysicalSortExpr {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         let opts_string = match (self.options.descending, self.options.nulls_first) {
diff --git a/datafusion/physical-expr/src/utils.rs b/datafusion/physical-expr/src/utils.rs
new file mode 100644
index 0000000000000..2c9832adb1521
--- /dev/null
+++ b/datafusion/physical-expr/src/utils.rs
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Collection of utility functions for Physical Expr optimization
+
+use crate::PhysicalExpr;
+use datafusion_common::DataFusionError;
+use std::result;
+use std::sync::Arc;
+
+pub type Result<T> = result::Result<T, DataFusionError>;
+
+/// Apply transform `F` to the PhysicalExpr's children, the transform `F` might have a direction(Preorder or Postorder)
+fn map_children<F>(
+    expr: Arc<dyn PhysicalExpr>,
+    transform: F,
+) -> Result<Arc<dyn PhysicalExpr>>
+where
+    F: Fn(Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>>,
+{
+    if !expr.children().is_empty() {
+        let new_children: Result<Vec<_>> =
+            expr.children().into_iter().map(transform).collect();
+        with_new_children_if_necessary(expr, new_children?)
+    } else {
+        Ok(expr)
+    }
+}
+
+/// Convenience utils for writing optimizers rule: recursively apply the given `op` to the PhysicalExpr tree.
+/// When `op` does not apply to a given expr, it is left unchanged.
+/// The default tree traversal direction is transform_down(Preorder Traversal).
+pub fn transform<F>(expr: Arc<dyn PhysicalExpr>, op: &F) -> Result<Arc<dyn PhysicalExpr>>
+where
+    F: Fn(Arc<dyn PhysicalExpr>) -> Option<Arc<dyn PhysicalExpr>>,
+{
+    transform_down(expr, op)
+}
+
+/// Convenience utils for writing optimizers rule: recursively apply the given 'op' to the PhysicalExpr and all of its
+/// children(Preorder Traversal). When the `op` does not apply to a given PhysicalExpr, it is left unchanged.
+pub fn transform_down<F>(
+    expr: Arc<dyn PhysicalExpr>,
+    op: &F,
+) -> Result<Arc<dyn PhysicalExpr>>
+where
+    F: Fn(Arc<dyn PhysicalExpr>) -> Option<Arc<dyn PhysicalExpr>>,
+{
+    let expr_cloned = expr.clone();
+    let after_op = match op(expr_cloned) {
+        Some(value) => value,
+        None => expr,
+    };
+    map_children(after_op.clone(), |expr: Arc<dyn PhysicalExpr>| {
+        transform_down(expr, op)
+    })
+}
+
+/// Convenience utils for writing optimizers rule: recursively apply the given 'op' first to all of its
+/// children and then itself(Postorder Traversal). When the `op` does not apply to a given PhysicalExpr, it is left unchanged.
+#[allow(dead_code)]
+pub fn transform_up<F>(
+    expr: Arc<dyn PhysicalExpr>,
+    op: &F,
+) -> Result<Arc<dyn PhysicalExpr>>
+where
+    F: Fn(Arc<dyn PhysicalExpr>) -> Option<Arc<dyn PhysicalExpr>>,
+{
+    let after_op_children =
+        map_children(expr, |expr: Arc<dyn PhysicalExpr>| transform_up(expr, op))?;
+
+    let after_op_children_clone = after_op_children.clone();
+    let new_expr = match op(after_op_children) {
+        Some(value) => value,
+        None => after_op_children_clone,
+    };
+    Ok(new_expr)
+}
+
+/// Returns a copy of this expr if we change any child according to the pointer comparison.
+/// The size of `children` must be equal to the size of `PhysicalExpr::children()`.
+/// Allow the vtable address comparisons for PhysicalExpr Trait Objects，it is harmless even
+/// in the case of 'false-native'.
+#[allow(clippy::vtable_address_comparisons)]
+pub fn with_new_children_if_necessary(
+    expr: Arc<dyn PhysicalExpr>,
+    children: Vec<Arc<dyn PhysicalExpr>>,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    if children.len() != expr.children().len() {
+        Err(DataFusionError::Internal(
+            "PhysicalExpr: Wrong number of children".to_string(),
+        ))
+    } else if children.is_empty()
+        || children
+            .iter()
+            .zip(expr.children().iter())
+            .any(|(c1, c2)| !Arc::ptr_eq(c1, c2))
+    {
+        expr.with_new_children(children)
+    } else {
+        Ok(expr)
+    }
+}

From 6d68a288949520ef5da7d679967817ce0099d1be Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Mon, 17 Oct 2022 13:56:21 +0800
Subject: [PATCH 02/12] fix testing folder

---
 datafusion/core/src/scheduler/pipeline/execution.rs | 5 +++++
 parquet-testing                                     | 2 +-
 testing                                             | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/scheduler/pipeline/execution.rs b/datafusion/core/src/scheduler/pipeline/execution.rs
index 58f535c1c54cf..dc129479ffe81 100644
--- a/datafusion/core/src/scheduler/pipeline/execution.rs
+++ b/datafusion/core/src/scheduler/pipeline/execution.rs
@@ -24,6 +24,7 @@ use std::task::{Context, Poll, Waker};
 
 use futures::{Stream, StreamExt};
 use parking_lot::Mutex;
+use datafusion_physical_expr::expressions::Column;
 
 use crate::arrow::datatypes::SchemaRef;
 use crate::arrow::{error::Result as ArrowResult, record_batch::RecordBatch};
@@ -247,6 +248,10 @@ impl ExecutionPlan for ProxyExecutionPlan {
         self.inner.prefer_parallel()
     }
 
+    fn equivalence_properties(&self) -> Vec<Vec<Column>> {
+        vec![]
+    }
+
     fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
         vec![]
     }
diff --git a/parquet-testing b/parquet-testing
index ddd898958803c..a11fc8f148f8a 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit ddd898958803cb89b7156c6350584d1cda0fe8de
+Subproject commit a11fc8f148f8a7a89d9281cc0da3eb9d56095fbf
diff --git a/testing b/testing
index a8f7be3805317..5bab2f264a23f 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit a8f7be380531758eb7962542a5eb020d8795aa20
+Subproject commit 5bab2f264a23f5af68f69ea93d24ef1e8e77fc88

From b199d0f7afa5374d2891de2d3c449daf3546b813 Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Mon, 17 Oct 2022 14:00:21 +0800
Subject: [PATCH 03/12] fix fmt

---
 datafusion/core/src/scheduler/pipeline/execution.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/src/scheduler/pipeline/execution.rs b/datafusion/core/src/scheduler/pipeline/execution.rs
index dc129479ffe81..66257f29f7538 100644
--- a/datafusion/core/src/scheduler/pipeline/execution.rs
+++ b/datafusion/core/src/scheduler/pipeline/execution.rs
@@ -22,9 +22,9 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll, Waker};
 
+use datafusion_physical_expr::expressions::Column;
 use futures::{Stream, StreamExt};
 use parking_lot::Mutex;
-use datafusion_physical_expr::expressions::Column;
 
 use crate::arrow::datatypes::SchemaRef;
 use crate::arrow::{error::Result as ArrowResult, record_batch::RecordBatch};

From eb011828c9eb4ad4a5c2f099d31205e34d8e4cb4 Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Mon, 17 Oct 2022 18:37:56 +0800
Subject: [PATCH 04/12] Add more UTs

---
 datafusion/core/src/dataframe.rs              |  27 ++
 datafusion/physical-expr/src/physical_expr.rs | 232 +++++++++++++++++-
 2 files changed, 254 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index c52f8259f2d04..7caa8c9e0aa6b 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -1474,4 +1474,31 @@ mod tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn partition_aware_union() -> Result<()> {
+        let left = test_table().await?.select_columns(&["c1", "c2"])?;
+        let right = test_table_with_name("c2")
+            .await?
+            .select_columns(&["c1", "c3"])?
+            .with_column_renamed("c2.c1", "c2_c1")?;
+
+        let left_rows = left.collect().await?;
+        let right_rows = right.collect().await?;
+        let join1 =
+            left.join(right.clone(), JoinType::Inner, &["c1"], &["c2_c1"], None)?;
+        let join2 = left.join(right, JoinType::Inner, &["c1"], &["c2_c1"], None)?;
+
+        let union = join1.union(join2)?;
+
+        let union_rows = union.collect().await?;
+
+        assert_eq!(100, left_rows.iter().map(|x| x.num_rows()).sum::<usize>());
+        assert_eq!(100, right_rows.iter().map(|x| x.num_rows()).sum::<usize>());
+        assert_eq!(4016, union_rows.iter().map(|x| x.num_rows()).sum::<usize>());
+
+        let physical_plan = union.create_physical_plan().await?;
+        assert_eq!(physical_plan.output_partitioning().partition_count(), 8);
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index caaec406ef74b..7d14b05bf63a0 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -132,9 +132,20 @@ pub fn expr_list_eq_any_order(
     list1: &[Arc<dyn PhysicalExpr>],
     list2: &[Arc<dyn PhysicalExpr>],
 ) -> bool {
-    list1.len() == list2.len()
-        && list1.iter().all(|e1| list2.iter().any(|e2| e2.eq(e1)))
-        && list2.iter().all(|e2| list1.iter().any(|e1| e1.eq(e2)))
+    if list1.len() == list2.len() {
+        let mut expr_vec1 = list1.to_vec();
+        let mut expr_vec2 = list2.to_vec();
+        while let Some(expr1) = expr_vec1.pop() {
+            if let Some(idx) = expr_vec2.iter().position(|expr2| expr1.eq(expr2)) {
+                expr_vec2.swap_remove(idx);
+            } else {
+                break;
+            }
+        }
+        expr_vec1.is_empty() && expr_vec2.is_empty()
+    } else {
+        false
+    }
 }
 
 /// Strictly compare the two sort expr lists in the given order.
@@ -168,6 +179,7 @@ pub fn split_predicate(predicate: &Arc<dyn PhysicalExpr>) -> Vec<&Arc<dyn Physic
     }
 }
 
+/// Combine the new equal condition with the existing equivalence properties.
 pub fn combine_equivalence_properties(
     eq_properties: &mut Vec<Vec<Column>>,
     new_condition: (&Column, &Column),
@@ -195,7 +207,9 @@ pub fn combine_equivalence_properties(
         let second_properties = eq_properties.get(idx2 as usize).unwrap().clone();
         let first_properties = eq_properties.get_mut(idx1 as usize).unwrap();
         for prop in second_properties {
-            first_properties.push(prop)
+            if !first_properties.contains(&prop) {
+                first_properties.push(prop)
+            }
         }
         eq_properties.remove(idx2 as usize);
     } else if idx1 == -1 && idx2 == -1 {
@@ -214,12 +228,13 @@ pub fn remove_equivalence_properties(
         let contains_second = prop.contains(remove_condition.1);
         if contains_first && contains_second {
             match_idx = idx as i32;
+            break;
         }
     }
     if match_idx >= 0 {
         let matches = eq_properties.get_mut(match_idx as usize).unwrap();
         matches.retain(|e| (e != remove_condition.0 && e != remove_condition.1));
-        if matches.is_empty() {
+        if matches.len() <= 1 {
             eq_properties.remove(match_idx as usize);
         }
     }
@@ -351,10 +366,13 @@ pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
 
 #[cfg(test)]
 mod tests {
+    use crate::expressions::Column;
+    use crate::PhysicalSortExpr;
     use std::sync::Arc;
 
     use super::*;
     use arrow::array::Int32Array;
+    use arrow::compute::SortOptions;
     use datafusion_common::Result;
 
     #[test]
@@ -422,4 +440,208 @@ mod tests {
         assert_eq!(&expected, result);
         Ok(())
     }
+
+    #[test]
+    fn expr_list_eq_any_order_test() -> Result<()> {
+        let list1: Vec<Arc<dyn PhysicalExpr>> = vec![
+            Arc::new(Column::new("a", 0)),
+            Arc::new(Column::new("a", 0)),
+            Arc::new(Column::new("b", 1)),
+        ];
+        let list2: Vec<Arc<dyn PhysicalExpr>> = vec![
+            Arc::new(Column::new("b", 1)),
+            Arc::new(Column::new("b", 1)),
+            Arc::new(Column::new("a", 0)),
+        ];
+        assert!(!expr_list_eq_any_order(list1.as_slice(), list2.as_slice()));
+        assert!(!expr_list_eq_any_order(list2.as_slice(), list1.as_slice()));
+
+        let list3: Vec<Arc<dyn PhysicalExpr>> = vec![
+            Arc::new(Column::new("a", 0)),
+            Arc::new(Column::new("b", 1)),
+            Arc::new(Column::new("c", 2)),
+            Arc::new(Column::new("a", 0)),
+            Arc::new(Column::new("b", 1)),
+        ];
+        let list4: Vec<Arc<dyn PhysicalExpr>> = vec![
+            Arc::new(Column::new("b", 1)),
+            Arc::new(Column::new("b", 1)),
+            Arc::new(Column::new("a", 0)),
+            Arc::new(Column::new("c", 2)),
+            Arc::new(Column::new("a", 0)),
+        ];
+        assert!(expr_list_eq_any_order(list3.as_slice(), list4.as_slice()));
+        assert!(expr_list_eq_any_order(list4.as_slice(), list3.as_slice()));
+        assert!(expr_list_eq_any_order(list3.as_slice(), list3.as_slice()));
+        assert!(expr_list_eq_any_order(list4.as_slice(), list4.as_slice()));
+
+        Ok(())
+    }
+
+    #[test]
+    fn sort_expr_list_eq_strict_order_test() -> Result<()> {
+        let list1: Vec<PhysicalSortExpr> = vec![
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                options: SortOptions::default(),
+            },
+        ];
+
+        let list2: Vec<PhysicalSortExpr> = vec![
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                options: SortOptions::default(),
+            },
+        ];
+
+        assert!(!sort_expr_list_eq_strict_order(
+            list1.as_slice(),
+            list2.as_slice()
+        ));
+        assert!(!sort_expr_list_eq_strict_order(
+            list2.as_slice(),
+            list1.as_slice()
+        ));
+
+        let list3: Vec<PhysicalSortExpr> = vec![
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                options: SortOptions::default(),
+            },
+        ];
+        let list4: Vec<PhysicalSortExpr> = vec![
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                options: SortOptions::default(),
+            },
+            PhysicalSortExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                options: SortOptions::default(),
+            },
+        ];
+
+        assert!(sort_expr_list_eq_strict_order(
+            list3.as_slice(),
+            list4.as_slice()
+        ));
+        assert!(sort_expr_list_eq_strict_order(
+            list4.as_slice(),
+            list3.as_slice()
+        ));
+        assert!(sort_expr_list_eq_strict_order(
+            list3.as_slice(),
+            list3.as_slice()
+        ));
+        assert!(sort_expr_list_eq_strict_order(
+            list4.as_slice(),
+            list4.as_slice()
+        ));
+
+        Ok(())
+    }
+
+    #[test]
+    fn combine_equivalence_properties_test() -> Result<()> {
+        let mut eq_properties: Vec<Vec<Column>> = vec![];
+        let new_condition = (&Column::new("a", 0), &Column::new("b", 1));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        assert_eq!(eq_properties.len(), 1);
+
+        let new_condition = (&Column::new("b", 1), &Column::new("a", 0));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        assert_eq!(eq_properties.len(), 1);
+        assert_eq!(eq_properties[0].len(), 2);
+
+        let new_condition = (&Column::new("b", 1), &Column::new("c", 2));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        assert_eq!(eq_properties.len(), 1);
+        assert_eq!(eq_properties[0].len(), 3);
+
+        let new_condition = (&Column::new("x", 99), &Column::new("y", 100));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        assert_eq!(eq_properties.len(), 2);
+
+        let new_condition = (&Column::new("x", 99), &Column::new("a", 0));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        assert_eq!(eq_properties.len(), 1);
+        assert_eq!(eq_properties[0].len(), 5);
+
+        Ok(())
+    }
+
+    #[test]
+    fn remove_equivalence_properties_test() -> Result<()> {
+        let mut eq_properties: Vec<Vec<Column>> = vec![];
+        let remove_condition = (&Column::new("a", 0), &Column::new("b", 1));
+        remove_equivalence_properties(&mut eq_properties, remove_condition);
+        assert_eq!(eq_properties.len(), 0);
+
+        let new_condition = (&Column::new("a", 0), &Column::new("b", 1));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        let new_condition = (&Column::new("a", 0), &Column::new("c", 2));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        let new_condition = (&Column::new("c", 2), &Column::new("d", 3));
+        combine_equivalence_properties(&mut eq_properties, new_condition);
+        assert_eq!(eq_properties.len(), 1);
+
+        let remove_condition = (&Column::new("a", 0), &Column::new("b", 1));
+        remove_equivalence_properties(&mut eq_properties, remove_condition);
+        assert_eq!(eq_properties.len(), 1);
+        assert_eq!(eq_properties[0].len(), 2);
+
+        Ok(())
+    }
+
+    #[test]
+    fn merge_equivalence_properties_with_alias_test() -> Result<()> {
+        let mut eq_properties: Vec<Vec<Column>> = vec![];
+        let mut alias_map = HashMap::new();
+        alias_map.insert(
+            Column::new("a", 0),
+            vec![Column::new("a1", 1), Column::new("a2", 2)],
+        );
+
+        merge_equivalence_properties_with_alias(&mut eq_properties, &alias_map);
+        assert_eq!(eq_properties.len(), 1);
+        assert_eq!(eq_properties[0].len(), 3);
+
+        let mut alias_map = HashMap::new();
+        alias_map.insert(
+            Column::new("a", 0),
+            vec![Column::new("a3", 1), Column::new("a4", 2)],
+        );
+        merge_equivalence_properties_with_alias(&mut eq_properties, &alias_map);
+        assert_eq!(eq_properties.len(), 1);
+        assert_eq!(eq_properties[0].len(), 5);
+
+        Ok(())
+    }
 }

From e60c206caefad6222132dba00c6fa431ab95f2b9 Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Mon, 17 Oct 2022 19:05:34 +0800
Subject: [PATCH 05/12] update Cargo.toml in physical-expr

---
 datafusion/physical-expr/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 28dce406f778b..7f19c59ae5679 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -56,4 +56,4 @@ rand = "0.8"
 regex = { version = "^1.4.3", optional = true }
 sha2 = { version = "^0.10.1", optional = true }
 unicode-segmentation = { version = "^1.7.1", optional = true }
-itertools = "0.10.3"
+itertools = "0.10"

From ec36e12c2c93f59cfa1ec8fc0e6773452a0c5628 Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Mon, 17 Oct 2022 22:59:20 +0800
Subject: [PATCH 06/12] Update lock and fix clippy

---
 datafusion-cli/Cargo.lock            | 33 ++++++++++++++--------------
 datafusion-cli/src/main.rs           |  2 +-
 datafusion-cli/src/object_storage.rs | 12 +++++-----
 datafusion/core/src/dataframe.rs     |  3 ++-
 4 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 29ddffd6ce9a3..e6b32f1f4626a 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -498,9 +498,9 @@ dependencies = [
 
 [[package]]
 name = "cxx"
-version = "1.0.78"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19f39818dcfc97d45b03953c1292efc4e80954e1583c4aa770bac1383e2310a4"
+checksum = "3f83d0ebf42c6eafb8d7c52f7e5f2d3003b89c7aa4fd2b79229209459a849af8"
 dependencies = [
  "cc",
  "cxxbridge-flags",
@@ -510,9 +510,9 @@ dependencies = [
 
 [[package]]
 name = "cxx-build"
-version = "1.0.78"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e580d70777c116df50c390d1211993f62d40302881e54d4b79727acb83d0199"
+checksum = "07d050484b55975889284352b0ffc2ecbda25c0c55978017c132b29ba0818a86"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -525,15 +525,15 @@ dependencies = [
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.78"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56a46460b88d1cec95112c8c363f0e2c39afdb237f60583b0b36343bf627ea9c"
+checksum = "99d2199b00553eda8012dfec8d3b1c75fce747cf27c169a270b3b99e3448ab78"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.78"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "747b608fecf06b0d72d440f27acc99288207324b793be2c17991839f3d4995ea"
+checksum = "dcb67a6de1f602736dd7eaead0080cf3435df806c61b24b13328db128c58868f"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -648,6 +648,7 @@ dependencies = [
  "datafusion-expr",
  "datafusion-row",
  "hashbrown",
+ "itertools",
  "lazy_static",
  "md-5",
  "ordered-float 3.2.0",
@@ -1119,9 +1120,9 @@ dependencies = [
 
 [[package]]
 name = "iana-time-zone-haiku"
-version = "0.1.0"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fde6edd6cef363e9359ed3c98ba64590ba9eecba2293eb5a723ab32aee8926aa"
+checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca"
 dependencies = [
  "cxx",
  "cxx-build",
@@ -1297,9 +1298,9 @@ checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565"
 
 [[package]]
 name = "libmimalloc-sys"
-version = "0.1.25"
+version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c"
+checksum = "8fc093ab289b0bfda3aa1bdfab9c9542be29c7ef385cfcbe77f8c9813588eb48"
 dependencies = [
  "cc",
 ]
@@ -1375,9 +1376,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
 
 [[package]]
 name = "mimalloc"
-version = "0.1.29"
+version = "0.1.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698"
+checksum = "76ce6a4b40d3bff9eb3ce9881ca0737a85072f9f975886082640cd46a75cdb35"
 dependencies = [
  "libmimalloc-sys",
 ]
@@ -1711,9 +1712,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.46"
+version = "1.0.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b"
+checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725"
 dependencies = [
  "unicode-ident",
 ]
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index cbb74bbf9b4ce..0944564f88928 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -147,7 +147,7 @@ fn create_runtime_env() -> Result<RuntimeEnv> {
         ObjectStoreRegistry::new_with_provider(Some(Arc::new(object_store_provider)));
     let rn_config =
         RuntimeConfig::new().with_object_store_registry(Arc::new(object_store_registry));
-    return RuntimeEnv::new(rn_config);
+    RuntimeEnv::new(rn_config)
 }
 
 fn is_valid_file(dir: &str) -> std::result::Result<(), String> {
diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs
index 19993e751c26d..df5235328a2cc 100644
--- a/datafusion-cli/src/object_storage.rs
+++ b/datafusion-cli/src/object_storage.rs
@@ -68,7 +68,7 @@ fn build_gcs_object_store(url: &Url) -> Result<Arc<dyn object_store::ObjectStore
     let host = get_host_name(url)?;
     let mut builder = GoogleCloudStorageBuilder::new().with_bucket_name(host);
 
-    if let Some(path) = env::var("GCP_SERVICE_ACCOUNT_PATH").ok() {
+    if let Ok(path) = env::var("GCP_SERVICE_ACCOUNT_PATH") {
         builder = builder.with_service_account_path(path);
     }
     match builder.build() {
@@ -78,10 +78,12 @@ fn build_gcs_object_store(url: &Url) -> Result<Arc<dyn object_store::ObjectStore
 }
 
 fn get_host_name(url: &Url) -> Result<&str> {
-    url.host_str().ok_or(DataFusionError::Execution(format!(
-        "Not able to parse hostname from url, {}",
-        url.as_str()
-    )))
+    url.host_str().ok_or_else(|| {
+        DataFusionError::Execution(format!(
+            "Not able to parse hostname from url, {}",
+            url.as_str()
+        ))
+    })
 }
 
 #[cfg(test)]
diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index 7caa8c9e0aa6b..607a3770f2e04 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -1498,7 +1498,8 @@ mod tests {
         assert_eq!(4016, union_rows.iter().map(|x| x.num_rows()).sum::<usize>());
 
         let physical_plan = union.create_physical_plan().await?;
-        assert_eq!(physical_plan.output_partitioning().partition_count(), 8);
+        let partition_count = SessionContext::new().copied_config().target_partitions;
+        assert_eq!(physical_plan.output_partitioning().partition_count(), partition_count);
         Ok(())
     }
 }

From 96496be7f88824dcb721fa9686735268c0f40cf7 Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Mon, 17 Oct 2022 23:06:17 +0800
Subject: [PATCH 07/12] fix fmt

---
 datafusion/core/src/dataframe.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index 607a3770f2e04..b41f3d29607d6 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -1499,7 +1499,10 @@ mod tests {
 
         let physical_plan = union.create_physical_plan().await?;
         let partition_count = SessionContext::new().copied_config().target_partitions;
-        assert_eq!(physical_plan.output_partitioning().partition_count(), partition_count);
+        assert_eq!(
+            physical_plan.output_partitioning().partition_count(),
+            partition_count
+        );
         Ok(())
     }
 }

From 4d924372c3cee8752a3961250c4b323da9ca738a Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Tue, 18 Oct 2022 10:40:38 +0800
Subject: [PATCH 08/12] fix clippy and cargo fmt

---
 datafusion/physical-expr/Cargo.toml           |  2 +-
 datafusion/physical-expr/src/physical_expr.rs | 32 ++++++++-----------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 7f19c59ae5679..7e609547a6365 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -56,4 +56,4 @@ rand = "0.8"
 regex = { version = "^1.4.3", optional = true }
 sha2 = { version = "^0.10.1", optional = true }
 unicode-segmentation = { version = "^1.7.1", optional = true }
-itertools = "0.10"
+itertools = { version = "0.10", features = ["use_std"] }
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index 7d14b05bf63a0..f14fda9f881e3 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -314,16 +314,13 @@ pub fn normalize_expr_with_equivalence_properties(
     eq_properties: &Vec<Vec<Column>>,
 ) -> Arc<dyn PhysicalExpr> {
     let mut normalized = expr.clone();
-    match expr.as_any().downcast_ref::<Column>() {
-        Some(column) => {
-            for prop in eq_properties {
-                if prop.contains(column) {
-                    normalized = Arc::new(prop.get(0).unwrap().clone());
-                    break;
-                }
+    if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+        for prop in eq_properties {
+            if prop.contains(column) {
+                normalized = Arc::new(prop.get(0).unwrap().clone());
+                break;
             }
         }
-        None => {}
     }
     normalized
 }
@@ -333,19 +330,16 @@ pub fn normalize_sort_expr_with_equivalence_properties(
     eq_properties: &Vec<Vec<Column>>,
 ) -> PhysicalSortExpr {
     let mut normalized = sort_expr.clone();
-    match sort_expr.expr.as_any().downcast_ref::<Column>() {
-        Some(column) => {
-            for prop in eq_properties {
-                if prop.contains(column) {
-                    normalized = PhysicalSortExpr {
-                        expr: Arc::new(prop.get(0).unwrap().clone()),
-                        options: sort_expr.options,
-                    };
-                    break;
-                }
+    if let Some(column) = sort_expr.expr.as_any().downcast_ref::<Column>() {
+        for prop in eq_properties {
+            if prop.contains(column) {
+                normalized = PhysicalSortExpr {
+                    expr: Arc::new(prop.get(0).unwrap().clone()),
+                    options: sort_expr.options,
+                };
+                break;
             }
         }
-        None => {}
     }
     normalized
 }

From d80ff107c77da6ec13a08952e8a2dd8216416b5a Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Tue, 18 Oct 2022 11:12:24 +0800
Subject: [PATCH 09/12] fix cargo fmt

---
 datafusion/physical-expr/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 7e609547a6365..9964ec58f4595 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -48,6 +48,7 @@ datafusion-common = { path = "../common", version = "13.0.0" }
 datafusion-expr = { path = "../expr", version = "13.0.0" }
 datafusion-row = { path = "../row", version = "13.0.0" }
 hashbrown = { version = "0.12", features = ["raw"] }
+itertools = { version = "0.10", features = ["use_std"] }
 lazy_static = { version = "^1.4.0" }
 md-5 = { version = "^0.10.0", optional = true }
 ordered-float = "3.0"
@@ -56,4 +57,3 @@ rand = "0.8"
 regex = { version = "^1.4.3", optional = true }
 sha2 = { version = "^0.10.1", optional = true }
 unicode-segmentation = { version = "^1.7.1", optional = true }
-itertools = { version = "0.10", features = ["use_std"] }

From 808a73730c8594adba6e84f6aaccb3d405340f8f Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Thu, 20 Oct 2022 16:57:17 +0800
Subject: [PATCH 10/12] combine transform utils methods to Trait

---
 .../physical_optimizer/coalesce_batches.rs    |   5 +-
 .../src/physical_optimizer/enforcement.rs     |   4 +-
 .../core/src/physical_optimizer/utils.rs      |  71 ------
 datafusion/core/src/physical_plan/mod.rs      | 140 +++++++++++
 datafusion/physical-expr/src/lib.rs           |   1 -
 datafusion/physical-expr/src/physical_expr.rs | 222 +++++++++++++++---
 datafusion/physical-expr/src/utils.rs         | 117 ---------
 7 files changed, 339 insertions(+), 221 deletions(-)
 delete mode 100644 datafusion/physical-expr/src/utils.rs

diff --git a/datafusion/core/src/physical_optimizer/coalesce_batches.rs b/datafusion/core/src/physical_optimizer/coalesce_batches.rs
index ac7ca388f90cc..104b6d18619ba 100644
--- a/datafusion/core/src/physical_optimizer/coalesce_batches.rs
+++ b/datafusion/core/src/physical_optimizer/coalesce_batches.rs
@@ -23,11 +23,10 @@ use crate::{
     physical_optimizer::PhysicalOptimizerRule,
     physical_plan::{
         coalesce_batches::CoalesceBatchesExec, filter::FilterExec,
-        hash_join::HashJoinExec, repartition::RepartitionExec,
+        hash_join::HashJoinExec, repartition::RepartitionExec, TreeNodeRewritable,
     },
 };
 
-use crate::physical_optimizer::utils::transform_up;
 use std::sync::Arc;
 
 /// Optimizer rule that introduces CoalesceBatchesExec to avoid overhead with small batches that
@@ -52,7 +51,7 @@ impl PhysicalOptimizerRule for CoalesceBatches {
         _config: &crate::execution::context::SessionConfig,
     ) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
         let target_batch_size = self.target_batch_size;
-        transform_up(plan, &|plan| {
+        plan.transform_up(&|plan| {
             let plan_any = plan.as_any();
             // The goal here is to detect operators that could produce small batches and only
             // wrap those ones with a CoalesceBatchesExec operator. An alternate approach here
diff --git a/datafusion/core/src/physical_optimizer/enforcement.rs b/datafusion/core/src/physical_optimizer/enforcement.rs
index b559d0a00b818..17b081cc74416 100644
--- a/datafusion/core/src/physical_optimizer/enforcement.rs
+++ b/datafusion/core/src/physical_optimizer/enforcement.rs
@@ -19,11 +19,11 @@
 //! requirements are met by inserting necessary [[RepartitionExec]] and [[SortExec]].
 //!
 use crate::error::Result;
-use crate::physical_optimizer::utils::transform_up;
 use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use crate::physical_plan::repartition::RepartitionExec;
 use crate::physical_plan::sorts::sort::SortExec;
+use crate::physical_plan::TreeNodeRewritable;
 use crate::physical_plan::{with_new_children_if_necessary, Distribution, ExecutionPlan};
 use crate::prelude::SessionConfig;
 use datafusion_physical_expr::expressions::Column;
@@ -58,7 +58,7 @@ impl PhysicalOptimizerRule for BasicEnforcement {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         // Distribution and Ordering enforcement need to be applied bottom-up.
         let target_partitions = config.target_partitions;
-        transform_up(plan, &{
+        plan.transform_up(&{
             |plan| Some(ensure_distribution_and_ordering(plan, target_partitions))
         })
     }
diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs
index 18fb7e2c63e11..4aceb776d7d5b 100644
--- a/datafusion/core/src/physical_optimizer/utils.rs
+++ b/datafusion/core/src/physical_optimizer/utils.rs
@@ -45,74 +45,3 @@ pub fn optimize_children(
         with_new_children_if_necessary(plan, children)
     }
 }
-
-/// Apply transform `F` to the plan's children, the transform `F` might have a direction(Preorder or Postorder)
-fn map_children<F>(
-    plan: Arc<dyn ExecutionPlan>,
-    transform: F,
-) -> Result<Arc<dyn ExecutionPlan>>
-where
-    F: Fn(Arc<dyn ExecutionPlan>) -> Result<Arc<dyn ExecutionPlan>>,
-{
-    if !plan.children().is_empty() {
-        let new_children: Result<Vec<_>> =
-            plan.children().into_iter().map(transform).collect();
-        with_new_children_if_necessary(plan, new_children?)
-    } else {
-        Ok(plan)
-    }
-}
-
-/// Convenience utils for writing optimizers rule: recursively apply the given `op` to the plan tree.
-/// When `op` does not apply to a given plan, it is left unchanged.
-/// The default tree traversal direction is transform_down(Preorder Traversal).
-#[allow(dead_code)]
-pub fn transform<F>(
-    plan: Arc<dyn ExecutionPlan>,
-    op: &F,
-) -> Result<Arc<dyn ExecutionPlan>>
-where
-    F: Fn(Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>>,
-{
-    transform_down(plan, op)
-}
-
-/// Convenience utils for writing optimizers rule: recursively apply the given 'op' to the plan and all of its
-/// children(Preorder Traversal). When the `op` does not apply to a given plan, it is left unchanged.
-#[allow(dead_code)]
-pub fn transform_down<F>(
-    plan: Arc<dyn ExecutionPlan>,
-    op: &F,
-) -> Result<Arc<dyn ExecutionPlan>>
-where
-    F: Fn(Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>>,
-{
-    let plan_cloned = plan.clone();
-    let after_op = match op(plan_cloned) {
-        Some(value) => value,
-        None => plan,
-    };
-    map_children(after_op.clone(), |plan: Arc<dyn ExecutionPlan>| {
-        transform_down(plan, op)
-    })
-}
-
-/// Convenience utils for writing optimizers rule: recursively apply the given 'op' first to all of its
-/// children and then itself(Postorder Traversal). When the `op` does not apply to a given plan, it is left unchanged.
-pub fn transform_up<F>(
-    plan: Arc<dyn ExecutionPlan>,
-    op: &F,
-) -> Result<Arc<dyn ExecutionPlan>>
-where
-    F: Fn(Arc<dyn ExecutionPlan>) -> Option<Arc<dyn ExecutionPlan>>,
-{
-    let after_op_children =
-        map_children(plan, |plan: Arc<dyn ExecutionPlan>| transform_up(plan, op))?;
-
-    let after_op_children_clone = after_op_children.clone();
-    let new_plan = match op(after_op_children) {
-        Some(value) => value,
-        None => after_op_children_clone,
-    };
-    Ok(new_plan)
-}
diff --git a/datafusion/core/src/physical_plan/mod.rs b/datafusion/core/src/physical_plan/mod.rs
index be07837bd84bb..bc34f8bf1cdeb 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -501,6 +501,146 @@ pub fn visit_execution_plan<V: ExecutionPlanVisitor>(
     Ok(())
 }
 
+/// a Trait for marking tree node types that are rewritable
+pub trait TreeNodeRewritable: Clone {
+    /// Transform the tree node using the given [TreeNodeRewriter]
+    /// It performs a depth first walk of an node and its children.
+    ///
+    /// For an node tree such as
+    /// ```text
+    /// ParentNode
+    ///    left: ChildNode1
+    ///    right: ChildNode2
+    /// ```
+    ///
+    /// The nodes are visited using the following order
+    /// ```text
+    /// pre_visit(ParentNode)
+    /// pre_visit(ChildNode1)
+    /// mutatate(ChildNode1)
+    /// pre_visit(ChildNode2)
+    /// mutate(ChildNode2)
+    /// mutate(ParentNode)
+    /// ```
+    ///
+    /// If an Err result is returned, recursion is stopped immediately
+    ///
+    /// If [`false`] is returned on a call to pre_visit, no
+    /// children of that node are visited, nor is mutate
+    /// called on that node
+    ///
+    fn transform_using<R: TreeNodeRewriter<Self>>(
+        self,
+        rewriter: &mut R,
+    ) -> Result<Self> {
+        let need_mutate = match rewriter.pre_visit(&self)? {
+            RewriteRecursion::Mutate => return rewriter.mutate(self),
+            RewriteRecursion::Stop => return Ok(self),
+            RewriteRecursion::Continue => true,
+            RewriteRecursion::Skip => false,
+        };
+
+        let after_op_children =
+            self.map_children(|node| node.transform_using(rewriter))?;
+
+        // now rewrite this node itself
+        if need_mutate {
+            rewriter.mutate(after_op_children)
+        } else {
+            Ok(after_op_children)
+        }
+    }
+
+    /// Convenience utils for writing optimizers rule: recursively apply the given `op` to the node tree.
+    /// When `op` does not apply to a given node, it is left uncshanged.
+    /// The default tree traversal direction is transform_up(Postorder Traversal).
+    fn transform<F>(self, op: &F) -> Result<Self>
+    where
+        F: Fn(Self) -> Option<Self>,
+    {
+        self.transform_up(op)
+    }
+
+    /// Convenience utils for writing optimizers rule: recursively apply the given 'op' to the node and all of its
+    /// children(Preorder Traversal).
+    /// When the `op` does not apply to a given node, it is left unchanged.
+    fn transform_down<F>(self, op: &F) -> Result<Self>
+    where
+        F: Fn(Self) -> Option<Self>,
+    {
+        let node_cloned = self.clone();
+        let after_op = match op(node_cloned) {
+            Some(value) => value,
+            None => self,
+        };
+        after_op.map_children(|node| node.transform_down(op))
+    }
+
+    /// Convenience utils for writing optimizers rule: recursively apply the given 'op' first to all of its
+    /// children and then itself(Postorder Traversal).
+    /// When the `op` does not apply to a given node, it is left unchanged.
+    fn transform_up<F>(self, op: &F) -> Result<Self>
+    where
+        F: Fn(Self) -> Option<Self>,
+    {
+        let after_op_children = self.map_children(|node| node.transform_up(op))?;
+
+        let after_op_children_clone = after_op_children.clone();
+        let new_node = match op(after_op_children) {
+            Some(value) => value,
+            None => after_op_children_clone,
+        };
+        Ok(new_node)
+    }
+
+    /// Apply transform `F` to the node's children, the transform `F` might have a direction(Preorder or Postorder)
+    fn map_children<F>(self, transform: F) -> Result<Self>
+    where
+        F: FnMut(Self) -> Result<Self>;
+}
+
+/// Trait for potentially recursively transform an [`TreeNodeRewritable`] node
+/// tree. When passed to `TreeNodeRewritable::transform_using`, `TreeNodeRewriter::mutate` is
+/// invoked recursively on all nodes of a tree.
+pub trait TreeNodeRewriter<N: TreeNodeRewritable>: Sized {
+    /// Invoked before (Preorder) any children of `node` are rewritten /
+    /// visited. Default implementation returns `Ok(RewriteRecursion::Continue)`
+    fn pre_visit(&mut self, _node: &N) -> Result<RewriteRecursion> {
+        Ok(RewriteRecursion::Continue)
+    }
+
+    /// Invoked after (Postorder) all children of `node` have been mutated and
+    /// returns a potentially modified ExecutionPlan.
+    fn mutate(&mut self, node: N) -> Result<N>;
+}
+
+/// Controls how the [TreeNodeRewriter] recursion should proceed.
+pub enum RewriteRecursion {
+    /// Continue rewrite / visit this node tree.
+    Continue,
+    /// Call 'op' immediately and return.
+    Mutate,
+    /// Do not rewrite / visit the children of this node.
+    Stop,
+    /// Keep recursive but skip apply op on this node
+    Skip,
+}
+
+impl TreeNodeRewritable for Arc<dyn ExecutionPlan> {
+    fn map_children<F>(self, transform: F) -> Result<Self>
+    where
+        F: FnMut(Self) -> Result<Self>,
+    {
+        if !self.children().is_empty() {
+            let new_children: Result<Vec<_>> =
+                self.children().into_iter().map(transform).collect();
+            with_new_children_if_necessary(self, new_children?)
+        } else {
+            Ok(self)
+        }
+    }
+}
+
 /// Execute the [ExecutionPlan] and collect the results in memory
 pub async fn collect(
     plan: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 861911bb8e880..0578ae26dcf48 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -37,7 +37,6 @@ pub mod type_coercion;
 pub mod udf;
 #[cfg(feature = "unicode_expressions")]
 pub mod unicode_expressions;
-mod utils;
 pub mod var_provider;
 pub mod window;
 
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index f14fda9f881e3..e3141dedf8ba5 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -18,14 +18,13 @@
 use arrow::datatypes::{DataType, Schema, SchemaRef};
 
 use arrow::record_batch::RecordBatch;
-
+use datafusion_common::DataFusionError;
 use datafusion_common::Result;
 
 use datafusion_expr::{ColumnarValue, Operator};
 use std::fmt::{Debug, Display};
 
 use crate::expressions::{BinaryExpr, Column};
-use crate::utils::transform;
 use crate::PhysicalSortExpr;
 use arrow::array::{make_array, Array, ArrayRef, BooleanArray, MutableArrayData};
 use arrow::compute::{and_kleene, filter_record_batch, is_not_null, SlicesIterator};
@@ -77,6 +76,172 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
     ) -> Result<Arc<dyn PhysicalExpr>>;
 }
 
+/// a Trait for marking tree node types that are rewritable
+pub trait TreeNodeRewritable: Clone {
+    /// Transform the tree node using the given [TreeNodeRewriter]
+    /// It performs a depth first walk of an node and its children.
+    ///
+    /// For an node tree such as
+    /// ```text
+    /// ParentNode
+    ///    left: ChildNode1
+    ///    right: ChildNode2
+    /// ```
+    ///
+    /// The nodes are visited using the following order
+    /// ```text
+    /// pre_visit(ParentNode)
+    /// pre_visit(ChildNode1)
+    /// mutatate(ChildNode1)
+    /// pre_visit(ChildNode2)
+    /// mutate(ChildNode2)
+    /// mutate(ParentNode)
+    /// ```
+    ///
+    /// If an Err result is returned, recursion is stopped immediately
+    ///
+    /// If [`false`] is returned on a call to pre_visit, no
+    /// children of that node are visited, nor is mutate
+    /// called on that node
+    ///
+    fn transform_using<R: TreeNodeRewriter<Self>>(
+        self,
+        rewriter: &mut R,
+    ) -> Result<Self> {
+        let need_mutate = match rewriter.pre_visit(&self)? {
+            RewriteRecursion::Mutate => return rewriter.mutate(self),
+            RewriteRecursion::Stop => return Ok(self),
+            RewriteRecursion::Continue => true,
+            RewriteRecursion::Skip => false,
+        };
+
+        let after_op_children =
+            self.map_children(|node| node.transform_using(rewriter))?;
+
+        // now rewrite this node itself
+        if need_mutate {
+            rewriter.mutate(after_op_children)
+        } else {
+            Ok(after_op_children)
+        }
+    }
+
+    /// Convenience utils for writing optimizers rule: recursively apply the given `op` to the node tree.
+    /// When `op` does not apply to a given node, it is left uncshanged.
+    /// The default tree traversal direction is transform_up(Postorder Traversal).
+    fn transform<F>(self, op: &F) -> Result<Self>
+    where
+        F: Fn(Self) -> Option<Self>,
+    {
+        self.transform_up(op)
+    }
+
+    /// Convenience utils for writing optimizers rule: recursively apply the given 'op' to the node and all of its
+    /// children(Preorder Traversal).
+    /// When the `op` does not apply to a given node, it is left unchanged.
+    fn transform_down<F>(self, op: &F) -> Result<Self>
+    where
+        F: Fn(Self) -> Option<Self>,
+    {
+        let node_cloned = self.clone();
+        let after_op = match op(node_cloned) {
+            Some(value) => value,
+            None => self,
+        };
+        after_op.map_children(|node| node.transform_down(op))
+    }
+
+    /// Convenience utils for writing optimizers rule: recursively apply the given 'op' first to all of its
+    /// children and then itself(Postorder Traversal).
+    /// When the `op` does not apply to a given node, it is left unchanged.
+    fn transform_up<F>(self, op: &F) -> Result<Self>
+    where
+        F: Fn(Self) -> Option<Self>,
+    {
+        let after_op_children = self.map_children(|node| node.transform_up(op))?;
+
+        let after_op_children_clone = after_op_children.clone();
+        let new_node = match op(after_op_children) {
+            Some(value) => value,
+            None => after_op_children_clone,
+        };
+        Ok(new_node)
+    }
+
+    /// Apply transform `F` to the node's children, the transform `F` might have a direction(Preorder or Postorder)
+    fn map_children<F>(self, transform: F) -> Result<Self>
+    where
+        F: FnMut(Self) -> Result<Self>;
+}
+
+/// Trait for potentially recursively transform an [`TreeNodeRewritable`] node
+/// tree. When passed to `TreeNodeRewritable::transform_using`, `TreeNodeRewriter::mutate` is
+/// invoked recursively on all nodes of a tree.
+pub trait TreeNodeRewriter<N: TreeNodeRewritable>: Sized {
+    /// Invoked before (Preorder) any children of `node` are rewritten /
+    /// visited. Default implementation returns `Ok(RewriteRecursion::Continue)`
+    fn pre_visit(&mut self, _node: &N) -> Result<RewriteRecursion> {
+        Ok(RewriteRecursion::Continue)
+    }
+
+    /// Invoked after (Postorder) all children of `node` have been mutated and
+    /// returns a potentially modified ExecutionPlan.
+    fn mutate(&mut self, node: N) -> Result<N>;
+}
+
+/// Controls how the [TreeNodeRewriter] recursion should proceed.
+#[allow(dead_code)]
+pub enum RewriteRecursion {
+    /// Continue rewrite / visit this node tree.
+    Continue,
+    /// Call 'op' immediately and return.
+    Mutate,
+    /// Do not rewrite / visit the children of this node.
+    Stop,
+    /// Keep recursive but skip apply op on this node
+    Skip,
+}
+
+impl TreeNodeRewritable for Arc<dyn PhysicalExpr> {
+    fn map_children<F>(self, transform: F) -> Result<Self>
+    where
+        F: FnMut(Self) -> Result<Self>,
+    {
+        if !self.children().is_empty() {
+            let new_children: Result<Vec<_>> =
+                self.children().into_iter().map(transform).collect();
+            with_new_children_if_necessary(self, new_children?)
+        } else {
+            Ok(self)
+        }
+    }
+}
+
+/// Returns a copy of this expr if we change any child according to the pointer comparison.
+/// The size of `children` must be equal to the size of `PhysicalExpr::children()`.
+/// Allow the vtable address comparisons for PhysicalExpr Trait Objects，it is harmless even
+/// in the case of 'false-native'.
+#[allow(clippy::vtable_address_comparisons)]
+pub fn with_new_children_if_necessary(
+    expr: Arc<dyn PhysicalExpr>,
+    children: Vec<Arc<dyn PhysicalExpr>>,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    if children.len() != expr.children().len() {
+        Err(DataFusionError::Internal(
+            "PhysicalExpr: Wrong number of children".to_string(),
+        ))
+    } else if children.is_empty()
+        || children
+            .iter()
+            .zip(expr.children().iter())
+            .any(|(c1, c2)| !Arc::ptr_eq(c1, c2))
+    {
+        expr.with_new_children(children)
+    } else {
+        Ok(expr)
+    }
+}
+
 /// Scatter `truthy` array by boolean mask. When the mask evaluates `true`, next values of `truthy`
 /// are taken, when the mask evaluates `false` values null values are filled.
 ///
@@ -281,32 +446,35 @@ pub fn normalize_out_expr_with_alias_schema(
     alias_map: &HashMap<Column, Vec<Column>>,
     schema: &SchemaRef,
 ) -> Arc<dyn PhysicalExpr> {
-    transform(expr.clone(), &|expr| {
-        let normalized_form: Option<Arc<dyn PhysicalExpr>> =
-            match expr.as_any().downcast_ref::<Column>() {
-                Some(column) => {
-                    let out = alias_map
-                        .get(column)
-                        .map(|c| {
-                            let out_col: Arc<dyn PhysicalExpr> = Arc::new(c[0].clone());
-                            out_col
-                        })
-                        .or_else(|| match schema.index_of(column.name()) {
-                            // Exactly matching, return None, no need to do the transform
-                            Ok(idx) if column.index() == idx => None,
-                            _ => {
+    let expr_clone = expr.clone();
+    expr_clone
+        .transform(&|expr| {
+            let normalized_form: Option<Arc<dyn PhysicalExpr>> =
+                match expr.as_any().downcast_ref::<Column>() {
+                    Some(column) => {
+                        let out = alias_map
+                            .get(column)
+                            .map(|c| {
                                 let out_col: Arc<dyn PhysicalExpr> =
-                                    Arc::new(Column::new(column.name(), usize::MAX));
-                                Some(out_col)
-                            }
-                        });
-                    out
-                }
-                None => None,
-            };
-        normalized_form
-    })
-    .unwrap_or(expr)
+                                    Arc::new(c[0].clone());
+                                out_col
+                            })
+                            .or_else(|| match schema.index_of(column.name()) {
+                                // Exactly matching, return None, no need to do the transform
+                                Ok(idx) if column.index() == idx => None,
+                                _ => {
+                                    let out_col: Arc<dyn PhysicalExpr> =
+                                        Arc::new(Column::new(column.name(), usize::MAX));
+                                    Some(out_col)
+                                }
+                            });
+                        out
+                    }
+                    None => None,
+                };
+            normalized_form
+        })
+        .unwrap_or(expr)
 }
 
 pub fn normalize_expr_with_equivalence_properties(
diff --git a/datafusion/physical-expr/src/utils.rs b/datafusion/physical-expr/src/utils.rs
deleted file mode 100644
index 2c9832adb1521..0000000000000
--- a/datafusion/physical-expr/src/utils.rs
+++ /dev/null
@@ -1,117 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Collection of utility functions for Physical Expr optimization
-
-use crate::PhysicalExpr;
-use datafusion_common::DataFusionError;
-use std::result;
-use std::sync::Arc;
-
-pub type Result<T> = result::Result<T, DataFusionError>;
-
-/// Apply transform `F` to the PhysicalExpr's children, the transform `F` might have a direction(Preorder or Postorder)
-fn map_children<F>(
-    expr: Arc<dyn PhysicalExpr>,
-    transform: F,
-) -> Result<Arc<dyn PhysicalExpr>>
-where
-    F: Fn(Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>>,
-{
-    if !expr.children().is_empty() {
-        let new_children: Result<Vec<_>> =
-            expr.children().into_iter().map(transform).collect();
-        with_new_children_if_necessary(expr, new_children?)
-    } else {
-        Ok(expr)
-    }
-}
-
-/// Convenience utils for writing optimizers rule: recursively apply the given `op` to the PhysicalExpr tree.
-/// When `op` does not apply to a given expr, it is left unchanged.
-/// The default tree traversal direction is transform_down(Preorder Traversal).
-pub fn transform<F>(expr: Arc<dyn PhysicalExpr>, op: &F) -> Result<Arc<dyn PhysicalExpr>>
-where
-    F: Fn(Arc<dyn PhysicalExpr>) -> Option<Arc<dyn PhysicalExpr>>,
-{
-    transform_down(expr, op)
-}
-
-/// Convenience utils for writing optimizers rule: recursively apply the given 'op' to the PhysicalExpr and all of its
-/// children(Preorder Traversal). When the `op` does not apply to a given PhysicalExpr, it is left unchanged.
-pub fn transform_down<F>(
-    expr: Arc<dyn PhysicalExpr>,
-    op: &F,
-) -> Result<Arc<dyn PhysicalExpr>>
-where
-    F: Fn(Arc<dyn PhysicalExpr>) -> Option<Arc<dyn PhysicalExpr>>,
-{
-    let expr_cloned = expr.clone();
-    let after_op = match op(expr_cloned) {
-        Some(value) => value,
-        None => expr,
-    };
-    map_children(after_op.clone(), |expr: Arc<dyn PhysicalExpr>| {
-        transform_down(expr, op)
-    })
-}
-
-/// Convenience utils for writing optimizers rule: recursively apply the given 'op' first to all of its
-/// children and then itself(Postorder Traversal). When the `op` does not apply to a given PhysicalExpr, it is left unchanged.
-#[allow(dead_code)]
-pub fn transform_up<F>(
-    expr: Arc<dyn PhysicalExpr>,
-    op: &F,
-) -> Result<Arc<dyn PhysicalExpr>>
-where
-    F: Fn(Arc<dyn PhysicalExpr>) -> Option<Arc<dyn PhysicalExpr>>,
-{
-    let after_op_children =
-        map_children(expr, |expr: Arc<dyn PhysicalExpr>| transform_up(expr, op))?;
-
-    let after_op_children_clone = after_op_children.clone();
-    let new_expr = match op(after_op_children) {
-        Some(value) => value,
-        None => after_op_children_clone,
-    };
-    Ok(new_expr)
-}
-
-/// Returns a copy of this expr if we change any child according to the pointer comparison.
-/// The size of `children` must be equal to the size of `PhysicalExpr::children()`.
-/// Allow the vtable address comparisons for PhysicalExpr Trait Objects，it is harmless even
-/// in the case of 'false-native'.
-#[allow(clippy::vtable_address_comparisons)]
-pub fn with_new_children_if_necessary(
-    expr: Arc<dyn PhysicalExpr>,
-    children: Vec<Arc<dyn PhysicalExpr>>,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    if children.len() != expr.children().len() {
-        Err(DataFusionError::Internal(
-            "PhysicalExpr: Wrong number of children".to_string(),
-        ))
-    } else if children.is_empty()
-        || children
-            .iter()
-            .zip(expr.children().iter())
-            .any(|(c1, c2)| !Arc::ptr_eq(c1, c2))
-    {
-        expr.with_new_children(children)
-    } else {
-        Ok(expr)
-    }
-}

From 8e7aec52f39b84afce1892bc1568124f0392bfc7 Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Thu, 20 Oct 2022 18:31:10 +0800
Subject: [PATCH 11/12] merge with upstream

---
 datafusion/core/src/physical_optimizer/enforcement.rs | 1 +
 datafusion/core/src/physical_plan/mod.rs              | 2 +-
 datafusion/physical-expr/src/physical_expr.rs         | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/enforcement.rs b/datafusion/core/src/physical_optimizer/enforcement.rs
index 17b081cc74416..29b89eeb6ec2b 100644
--- a/datafusion/core/src/physical_optimizer/enforcement.rs
+++ b/datafusion/core/src/physical_optimizer/enforcement.rs
@@ -241,6 +241,7 @@ mod tests {
                 projection: None,
                 limit: None,
                 table_partition_cols: vec![],
+                config_options: ConfigOptions::new().into_shareable(),
             },
             None,
             None,
diff --git a/datafusion/core/src/physical_plan/mod.rs b/datafusion/core/src/physical_plan/mod.rs
index bc34f8bf1cdeb..6505c94914fc4 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -610,7 +610,7 @@ pub trait TreeNodeRewriter<N: TreeNodeRewritable>: Sized {
     }
 
     /// Invoked after (Postorder) all children of `node` have been mutated and
-    /// returns a potentially modified ExecutionPlan.
+    /// returns a potentially modified node.
     fn mutate(&mut self, node: N) -> Result<N>;
 }
 
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index e3141dedf8ba5..0006d926072ce 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -185,7 +185,7 @@ pub trait TreeNodeRewriter<N: TreeNodeRewritable>: Sized {
     }
 
     /// Invoked after (Postorder) all children of `node` have been mutated and
-    /// returns a potentially modified ExecutionPlan.
+    /// returns a potentially modified node.
     fn mutate(&mut self, node: N) -> Result<N>;
 }
 

From 835376c017031231f4a9014a71728b5fbd42e10c Mon Sep 17 00:00:00 2001
From: Wang <mingmwang@ebay.com>
Date: Wed, 26 Oct 2022 13:46:40 +0800
Subject: [PATCH 12/12] Join Keys Reordering before Enforcement

---
 datafusion/core/src/dataframe.rs              |   53 +-
 datafusion/core/src/execution/context.rs      |    3 +
 .../src/physical_optimizer/enforcement.rs     | 1046 ++++++++++++++---
 .../core/src/physical_plan/aggregates/mod.rs  |   10 +-
 .../core/src/physical_plan/hash_join.rs       |   74 +-
 datafusion/core/src/physical_plan/mod.rs      |   28 +-
 .../core/src/physical_plan/projection.rs      |    8 +-
 .../core/src/physical_plan/sort_merge_join.rs |  121 +-
 .../physical_plan/windows/window_agg_exec.rs  |   10 +-
 .../physical-expr/src/expressions/mod.rs      |    1 +
 .../physical-expr/src/expressions/no_op.rs    |    2 +-
 datafusion/physical-expr/src/lib.rs           |    2 +
 datafusion/physical-expr/src/physical_expr.rs |   16 +-
 13 files changed, 1183 insertions(+), 191 deletions(-)

diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs
index b41f3d29607d6..54b45123bf163 100644
--- a/datafusion/core/src/dataframe.rs
+++ b/datafusion/core/src/dataframe.rs
@@ -1498,10 +1498,59 @@ mod tests {
         assert_eq!(4016, union_rows.iter().map(|x| x.num_rows()).sum::<usize>());
 
         let physical_plan = union.create_physical_plan().await?;
-        let partition_count = SessionContext::new().copied_config().target_partitions;
+        let default_partition_count =
+            SessionContext::new().copied_config().target_partitions;
         assert_eq!(
             physical_plan.output_partitioning().partition_count(),
-            partition_count
+            default_partition_count
+        );
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn non_partition_aware_union() -> Result<()> {
+        let left = test_table().await?.select_columns(&["c1", "c2"])?;
+        let right = test_table_with_name("c2")
+            .await?
+            .select_columns(&["c1", "c2"])?
+            .with_column_renamed("c2.c1", "c2_c1")?
+            .with_column_renamed("c2.c2", "c2_c2")?;
+
+        let left_rows = left.collect().await?;
+        let right_rows = right.collect().await?;
+        let join1 = left.join(
+            right.clone(),
+            JoinType::Inner,
+            &["c1", "c2"],
+            &["c2_c1", "c2_c2"],
+            None,
+        )?;
+
+        // join key ordering is different
+        let join2 = left.join(
+            right,
+            JoinType::Inner,
+            &["c2", "c1"],
+            &["c2_c2", "c2_c1"],
+            None,
+        )?;
+
+        let union = join1.union(join2)?;
+
+        let union_rows = union.collect().await?;
+
+        assert_eq!(100, left_rows.iter().map(|x| x.num_rows()).sum::<usize>());
+        assert_eq!(100, right_rows.iter().map(|x| x.num_rows()).sum::<usize>());
+        assert_eq!(916, union_rows.iter().map(|x| x.num_rows()).sum::<usize>());
+
+        let physical_plan = union.create_physical_plan().await?;
+        let default_partition_count =
+            SessionContext::new().copied_config().target_partitions;
+
+        // the union's output partitioning count should be the combination of all output partitions count
+        assert_eq!(
+            physical_plan.output_partitioning().partition_count(),
+            default_partition_count * 2
         );
         Ok(())
     }
diff --git a/datafusion/core/src/execution/context.rs b/datafusion/core/src/execution/context.rs
index 2b1e5b47028f8..4dc9ca08cd4bf 100644
--- a/datafusion/core/src/execution/context.rs
+++ b/datafusion/core/src/execution/context.rs
@@ -1146,6 +1146,8 @@ pub struct SessionConfig {
     pub parquet_pruning: bool,
     /// Should DataFusion collect statistics after listing files
     pub collect_statistics: bool,
+    /// Should DataFusion optimizer run a top down process to reorder the join keys
+    pub top_down_join_key_reordering: bool,
     /// Configuration options
     pub config_options: Arc<RwLock<ConfigOptions>>,
     /// Opaque extensions.
@@ -1165,6 +1167,7 @@ impl Default for SessionConfig {
             repartition_windows: true,
             parquet_pruning: true,
             collect_statistics: false,
+            top_down_join_key_reordering: true,
             config_options: Arc::new(RwLock::new(ConfigOptions::new())),
             // Assume no extensions by default.
             extensions: HashMap::with_capacity_and_hasher(
diff --git a/datafusion/core/src/physical_optimizer/enforcement.rs b/datafusion/core/src/physical_optimizer/enforcement.rs
index 29b89eeb6ec2b..b0db616bf2624 100644
--- a/datafusion/core/src/physical_optimizer/enforcement.rs
+++ b/datafusion/core/src/physical_optimizer/enforcement.rs
@@ -20,16 +20,24 @@
 //!
 use crate::error::Result;
 use crate::physical_optimizer::PhysicalOptimizerRule;
+use crate::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
 use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use crate::physical_plan::hash_join::{HashJoinExec, PartitionMode};
+use crate::physical_plan::projection::ProjectionExec;
 use crate::physical_plan::repartition::RepartitionExec;
+use crate::physical_plan::sort_merge_join::SortMergeJoinExec;
 use crate::physical_plan::sorts::sort::SortExec;
-use crate::physical_plan::TreeNodeRewritable;
+use crate::physical_plan::windows::WindowAggExec;
 use crate::physical_plan::{with_new_children_if_necessary, Distribution, ExecutionPlan};
+use crate::physical_plan::{Partitioning, TreeNodeRewritable};
 use crate::prelude::SessionConfig;
 use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::expressions::NoOp;
 use datafusion_physical_expr::{
-    normalize_sort_expr_with_equivalence_properties, PhysicalSortExpr,
+    expr_list_eq_strict_order, normalize_expr_with_equivalence_properties,
+    normalize_sort_expr_with_equivalence_properties, PhysicalExpr, PhysicalSortExpr,
 };
+use std::collections::HashMap;
 use std::sync::Arc;
 
 /// BasicEnforcement rule, it ensures the Distribution and Ordering requirements are met
@@ -56,10 +64,27 @@ impl PhysicalOptimizerRule for BasicEnforcement {
         plan: Arc<dyn ExecutionPlan>,
         config: &SessionConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        // Distribution and Ordering enforcement need to be applied bottom-up.
         let target_partitions = config.target_partitions;
-        plan.transform_up(&{
-            |plan| Some(ensure_distribution_and_ordering(plan, target_partitions))
+        let top_down_join_key_reordering = config.top_down_join_key_reordering;
+        let new_plan = if top_down_join_key_reordering {
+            // Run a top-down process to adjust input key ordering recursively
+            adjust_input_keys_down_recursively(plan, vec![])?
+        } else {
+            plan
+        };
+        // Distribution and Ordering enforcement need to be applied bottom-up.
+        new_plan.transform_up(&{
+            |plan| {
+                let adjusted = if !top_down_join_key_reordering {
+                    reorder_join_keys_to_inputs(plan)
+                } else {
+                    plan
+                };
+                Some(ensure_distribution_and_ordering(
+                    adjusted,
+                    target_partitions,
+                ))
+            }
         })
     }
 
@@ -68,6 +93,598 @@ impl PhysicalOptimizerRule for BasicEnforcement {
     }
 }
 
+/// When the physical planner creates the Joins, the ordering of join keys is from the original query.
+/// That might not match with the output partitioning of the join node's children
+/// This method run a top-down process and try to adjust the output partitionging of the children
+/// if children themselves are joins or aggregations.
+fn adjust_input_keys_down_recursively(
+    plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
+    parent_required: Vec<Arc<dyn PhysicalExpr>>,
+) -> Result<Arc<dyn crate::physical_plan::ExecutionPlan>> {
+    let plan_any = plan.as_any();
+    if let Some(HashJoinExec {
+        left,
+        right,
+        on,
+        filter,
+        join_type,
+        mode,
+        null_equals_null,
+        ..
+    }) = plan_any.downcast_ref::<HashJoinExec>()
+    {
+        match mode {
+            PartitionMode::Partitioned => {
+                let join_key_pairs = extract_join_keys(on);
+                if let Some((
+                    JoinKeyPairs {
+                        left_keys,
+                        right_keys,
+                    },
+                    new_positions,
+                )) = try_reorder(
+                    join_key_pairs.clone(),
+                    parent_required,
+                    &plan.equivalence_properties(),
+                ) {
+                    let new_join_on = if !new_positions.is_empty() {
+                        new_join_conditions(&left_keys, &right_keys)
+                    } else {
+                        on.clone()
+                    };
+                    let new_left =
+                        adjust_input_keys_down_recursively(left.clone(), left_keys)?;
+                    let new_right =
+                        adjust_input_keys_down_recursively(right.clone(), right_keys)?;
+                    Ok(Arc::new(HashJoinExec::try_new(
+                        new_left,
+                        new_right,
+                        new_join_on,
+                        filter.clone(),
+                        join_type,
+                        PartitionMode::Partitioned,
+                        null_equals_null,
+                    )?))
+                } else {
+                    let new_left = adjust_input_keys_down_recursively(
+                        left.clone(),
+                        join_key_pairs.left_keys,
+                    )?;
+                    let new_right = adjust_input_keys_down_recursively(
+                        right.clone(),
+                        join_key_pairs.right_keys,
+                    )?;
+                    Ok(Arc::new(HashJoinExec::try_new(
+                        new_left,
+                        new_right,
+                        on.clone(),
+                        filter.clone(),
+                        join_type,
+                        PartitionMode::Partitioned,
+                        null_equals_null,
+                    )?))
+                }
+            }
+            PartitionMode::CollectLeft => {
+                let new_right =
+                    adjust_input_keys_down_recursively(right.clone(), parent_required)?;
+                Ok(Arc::new(HashJoinExec::try_new(
+                    left.clone(),
+                    new_right,
+                    on.clone(),
+                    filter.clone(),
+                    join_type,
+                    PartitionMode::CollectLeft,
+                    null_equals_null,
+                )?))
+            }
+        }
+    } else if let Some(SortMergeJoinExec {
+        left,
+        right,
+        on,
+        join_type,
+        sort_options,
+        null_equals_null,
+        ..
+    }) = plan_any.downcast_ref::<SortMergeJoinExec>()
+    {
+        let join_key_pairs = extract_join_keys(on);
+        if let Some((
+            JoinKeyPairs {
+                left_keys,
+                right_keys,
+            },
+            new_positions,
+        )) = try_reorder(
+            join_key_pairs.clone(),
+            parent_required,
+            &plan.equivalence_properties(),
+        ) {
+            let new_join_on = if !new_positions.is_empty() {
+                new_join_conditions(&left_keys, &right_keys)
+            } else {
+                on.clone()
+            };
+            let new_options = if !new_positions.is_empty() {
+                let mut new_sort_options = vec![];
+                for idx in 0..sort_options.len() {
+                    new_sort_options.push(sort_options[new_positions[idx]])
+                }
+                new_sort_options
+            } else {
+                sort_options.clone()
+            };
+
+            let new_left = adjust_input_keys_down_recursively(left.clone(), left_keys)?;
+            let new_right =
+                adjust_input_keys_down_recursively(right.clone(), right_keys)?;
+
+            Ok(Arc::new(SortMergeJoinExec::try_new(
+                new_left,
+                new_right,
+                new_join_on,
+                *join_type,
+                new_options,
+                *null_equals_null,
+            )?))
+        } else {
+            let new_left = adjust_input_keys_down_recursively(
+                left.clone(),
+                join_key_pairs.left_keys,
+            )?;
+            let new_right = adjust_input_keys_down_recursively(
+                right.clone(),
+                join_key_pairs.right_keys,
+            )?;
+            Ok(Arc::new(SortMergeJoinExec::try_new(
+                new_left,
+                new_right,
+                on.clone(),
+                *join_type,
+                sort_options.clone(),
+                *null_equals_null,
+            )?))
+        }
+    } else if let Some(AggregateExec {
+        mode,
+        group_by,
+        aggr_expr,
+        input,
+        input_schema,
+        ..
+    }) = plan_any.downcast_ref::<AggregateExec>()
+    {
+        if parent_required.is_empty() {
+            Ok(plan)
+        } else {
+            match mode {
+                AggregateMode::FinalPartitioned | AggregateMode::Partial => {
+                    let out_put_columns = group_by
+                        .expr()
+                        .iter()
+                        .enumerate()
+                        .map(|(index, (_col, name))| Column::new(name, index))
+                        .collect::<Vec<_>>();
+
+                    let out_put_exprs = out_put_columns
+                        .iter()
+                        .map(|c| Arc::new(c.clone()) as Arc<dyn PhysicalExpr>)
+                        .collect::<Vec<_>>();
+
+                    // Check whether the requirements can be satisfied by the Aggregation
+                    if parent_required.len() != out_put_exprs.len()
+                        || expr_list_eq_strict_order(&out_put_exprs, &parent_required)
+                        || !group_by.null_expr().is_empty()
+                    {
+                        Ok(plan)
+                    } else {
+                        let new_positions =
+                            expected_expr_positions(&out_put_exprs, &parent_required);
+                        match new_positions {
+                            Some(positions) => {
+                                let mut new_group_exprs = vec![];
+                                for idx in positions.into_iter() {
+                                    new_group_exprs.push(group_by.expr()[idx].clone());
+                                }
+                                let new_group_by =
+                                    PhysicalGroupBy::new_single(new_group_exprs);
+                                match mode {
+                                    AggregateMode::FinalPartitioned => {
+                                        let new_input =
+                                            adjust_input_keys_down_recursively(
+                                                input.clone(),
+                                                parent_required,
+                                            )?;
+                                        let new_agg = Arc::new(AggregateExec::try_new(
+                                            AggregateMode::FinalPartitioned,
+                                            new_group_by,
+                                            aggr_expr.clone(),
+                                            new_input,
+                                            input_schema.clone(),
+                                        )?);
+
+                                        // Need to create a new projection to change the expr ordering back
+                                        let mut proj_exprs = out_put_columns
+                                            .iter()
+                                            .map(|col| {
+                                                (
+                                                    Arc::new(Column::new(
+                                                        col.name(),
+                                                        new_agg
+                                                            .schema()
+                                                            .index_of(col.name())
+                                                            .unwrap(),
+                                                    ))
+                                                        as Arc<dyn PhysicalExpr>,
+                                                    col.name().to_owned(),
+                                                )
+                                            })
+                                            .collect::<Vec<_>>();
+                                        let agg_schema = new_agg.schema();
+                                        let agg_fields = agg_schema.fields();
+                                        for (idx, field) in agg_fields
+                                            .iter()
+                                            .enumerate()
+                                            .skip(out_put_columns.len())
+                                        {
+                                            proj_exprs.push((
+                                                Arc::new(Column::new(
+                                                    field.name().as_str(),
+                                                    idx,
+                                                ))
+                                                    as Arc<dyn PhysicalExpr>,
+                                                field.name().clone(),
+                                            ))
+                                        }
+                                        // TODO merge adjacent Projections if there are
+                                        Ok(Arc::new(ProjectionExec::try_new(
+                                            proj_exprs, new_agg,
+                                        )?))
+                                    }
+                                    AggregateMode::Partial => {
+                                        Ok(Arc::new(AggregateExec::try_new(
+                                            AggregateMode::Partial,
+                                            new_group_by,
+                                            aggr_expr.clone(),
+                                            input.clone(),
+                                            input_schema.clone(),
+                                        )?))
+                                    }
+                                    _ => Ok(plan),
+                                }
+                            }
+                            _ => Ok(plan),
+                        }
+                    }
+                }
+                _ => Ok(plan),
+            }
+        }
+    } else if let Some(ProjectionExec { expr, .. }) =
+        plan_any.downcast_ref::<ProjectionExec>()
+    {
+        // For Projection, we need to transform the columns to the columns before the Projection
+        // And then to push down the requirements
+        let mut column_mapping = HashMap::new();
+        for (expression, name) in expr.iter() {
+            if let Some(column) = expression.as_any().downcast_ref::<Column>() {
+                column_mapping.insert(name.clone(), column.clone());
+            };
+        }
+        let new_required: Vec<Arc<dyn PhysicalExpr>> = parent_required
+            .iter()
+            .filter_map(|r| {
+                if let Some(column) = r.as_any().downcast_ref::<Column>() {
+                    column_mapping.get(column.name())
+                } else {
+                    None
+                }
+            })
+            .map(|e| Arc::new(e.clone()) as Arc<dyn PhysicalExpr>)
+            .collect::<Vec<_>>();
+        if new_required.len() == parent_required.len() {
+            plan.map_children(|plan| {
+                adjust_input_keys_down_recursively(plan, new_required.clone())
+            })
+        } else {
+            Ok(plan)
+        }
+    } else if let Some(WindowAggExec { input: _, .. }) =
+        plan_any.downcast_ref::<WindowAggExec>()
+    {
+        // TODO
+        Ok(plan)
+    } else if parent_required.is_empty() {
+        Ok(plan)
+    } else {
+        plan.map_children(|plan| {
+            adjust_input_keys_down_recursively(plan, parent_required.clone())
+        })
+    }
+}
+
+/// When the physical planner creates the Joins, the ordering of join keys is from the original query.
+/// That might not match with the output partitioning of the join node's children
+/// This method will try to change the ordering of the join keys to match with the
+/// partitioning of the join nodes' children.
+/// If it can not match with both sides, it will try to match with one, either left side or right side.
+fn reorder_join_keys_to_inputs(
+    plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
+) -> Arc<dyn crate::physical_plan::ExecutionPlan> {
+    let plan_any = plan.as_any();
+    if let Some(HashJoinExec {
+        left,
+        right,
+        on,
+        filter,
+        join_type,
+        mode,
+        null_equals_null,
+        ..
+    }) = plan_any.downcast_ref::<HashJoinExec>()
+    {
+        match mode {
+            PartitionMode::Partitioned => {
+                let join_key_pairs = extract_join_keys(on);
+                if let Some((
+                    JoinKeyPairs {
+                        left_keys,
+                        right_keys,
+                    },
+                    new_positions,
+                )) = reorder_current_join_keys(
+                    join_key_pairs,
+                    Some(left.output_partitioning()),
+                    Some(right.output_partitioning()),
+                    &plan.equivalence_properties(),
+                ) {
+                    if !new_positions.is_empty() {
+                        let new_join_on = new_join_conditions(&left_keys, &right_keys);
+                        Arc::new(
+                            HashJoinExec::try_new(
+                                left.clone(),
+                                right.clone(),
+                                new_join_on,
+                                filter.clone(),
+                                join_type,
+                                PartitionMode::Partitioned,
+                                null_equals_null,
+                            )
+                            .unwrap(),
+                        )
+                    } else {
+                        plan
+                    }
+                } else {
+                    plan
+                }
+            }
+            _ => plan,
+        }
+    } else if let Some(SortMergeJoinExec {
+        left,
+        right,
+        on,
+        join_type,
+        sort_options,
+        null_equals_null,
+        ..
+    }) = plan_any.downcast_ref::<SortMergeJoinExec>()
+    {
+        let join_key_pairs = extract_join_keys(on);
+        if let Some((
+            JoinKeyPairs {
+                left_keys,
+                right_keys,
+            },
+            new_positions,
+        )) = reorder_current_join_keys(
+            join_key_pairs,
+            Some(left.output_partitioning()),
+            Some(right.output_partitioning()),
+            &plan.equivalence_properties(),
+        ) {
+            if !new_positions.is_empty() {
+                let new_join_on = new_join_conditions(&left_keys, &right_keys);
+                let mut new_sort_options = vec![];
+                for idx in 0..sort_options.len() {
+                    new_sort_options.push(sort_options[new_positions[idx]])
+                }
+                Arc::new(
+                    SortMergeJoinExec::try_new(
+                        left.clone(),
+                        right.clone(),
+                        new_join_on,
+                        *join_type,
+                        new_sort_options,
+                        *null_equals_null,
+                    )
+                    .unwrap(),
+                )
+            } else {
+                plan
+            }
+        } else {
+            plan
+        }
+    } else {
+        plan
+    }
+}
+
+/// Reorder the current join keys ordering based on either left partition or right partition.
+fn reorder_current_join_keys(
+    join_keys: JoinKeyPairs,
+    left_partition: Option<Partitioning>,
+    right_partition: Option<Partitioning>,
+    equivalence_properties: &[Vec<Column>],
+) -> Option<(JoinKeyPairs, Vec<usize>)> {
+    match (left_partition.clone(), right_partition.clone()) {
+        (Some(Partitioning::Hash(left_exprs, _)), _) => {
+            try_reorder(join_keys.clone(), left_exprs, equivalence_properties).or_else(
+                || {
+                    reorder_current_join_keys(
+                        join_keys,
+                        None,
+                        right_partition,
+                        equivalence_properties,
+                    )
+                },
+            )
+        }
+        (_, Some(Partitioning::Hash(right_exprs, _))) => {
+            try_reorder(join_keys.clone(), right_exprs, equivalence_properties).or_else(
+                || {
+                    reorder_current_join_keys(
+                        join_keys,
+                        left_partition,
+                        None,
+                        equivalence_properties,
+                    )
+                },
+            )
+        }
+        _ => None,
+    }
+}
+
+fn try_reorder(
+    join_keys: JoinKeyPairs,
+    expected: Vec<Arc<dyn PhysicalExpr>>,
+    equivalence_properties: &[Vec<Column>],
+) -> Option<(JoinKeyPairs, Vec<usize>)> {
+    if join_keys.left_keys.len() != expected.len() {
+        return None;
+    }
+    if expr_list_eq_strict_order(&expected, &join_keys.left_keys) {
+        return Some((join_keys, vec![]));
+    }
+    let new_positions = expected_expr_positions(&join_keys.left_keys, &expected);
+    match new_positions {
+        Some(positions) => {
+            let mut new_right_keys = vec![];
+            for pos in positions.iter() {
+                new_right_keys.push(join_keys.right_keys[*pos].clone());
+            }
+            Some((
+                JoinKeyPairs {
+                    left_keys: expected,
+                    right_keys: new_right_keys,
+                },
+                positions,
+            ))
+        }
+        None => {
+            if !equivalence_properties.is_empty() {
+                let normalized_expected = expected
+                    .iter()
+                    .map(|e| {
+                        normalize_expr_with_equivalence_properties(
+                            e.clone(),
+                            equivalence_properties,
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                let normalized_left_keys = join_keys
+                    .left_keys
+                    .iter()
+                    .map(|e| {
+                        normalize_expr_with_equivalence_properties(
+                            e.clone(),
+                            equivalence_properties,
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                if expr_list_eq_strict_order(&normalized_expected, &normalized_left_keys)
+                {
+                    Some((join_keys, vec![]))
+                } else {
+                    let new_positions = expected_expr_positions(
+                        &normalized_left_keys,
+                        &normalized_expected,
+                    );
+                    match new_positions {
+                        Some(positions) => {
+                            let mut new_left_keys = vec![];
+                            let mut new_right_keys = vec![];
+                            for pos in positions.iter() {
+                                new_left_keys.push(join_keys.left_keys[*pos].clone());
+                                new_right_keys.push(join_keys.right_keys[*pos].clone());
+                            }
+                            Some((
+                                JoinKeyPairs {
+                                    left_keys: new_left_keys,
+                                    right_keys: new_right_keys,
+                                },
+                                positions,
+                            ))
+                        }
+                        None => None,
+                    }
+                }
+            } else {
+                None
+            }
+        }
+    }
+}
+
+/// Return the expected expressions positions.
+/// For example, the current expressions are ['c', 'a', 'a', b'], the expected expressions are ['b', 'c', 'a', 'a'],
+///
+/// This method will return a Vec [3, 0, 1, 2]
+fn expected_expr_positions(
+    current: &[Arc<dyn PhysicalExpr>],
+    expected: &[Arc<dyn PhysicalExpr>],
+) -> Option<Vec<usize>> {
+    let mut indexes: Vec<usize> = vec![];
+    let mut current = current.to_vec();
+    for expr in expected.iter() {
+        // Find the position of the expected expr in the current expressions
+        if let Some(expected_position) = current.iter().position(|e| e.eq(expr)) {
+            current[expected_position] = Arc::new(NoOp::new());
+            indexes.push(expected_position);
+        } else {
+            return None;
+        }
+    }
+    Some(indexes)
+}
+
+fn extract_join_keys(on: &[(Column, Column)]) -> JoinKeyPairs {
+    let (left_keys, right_keys) = on
+        .iter()
+        .map(|(l, r)| {
+            (
+                Arc::new(l.clone()) as Arc<dyn PhysicalExpr>,
+                Arc::new(r.clone()) as Arc<dyn PhysicalExpr>,
+            )
+        })
+        .unzip();
+    JoinKeyPairs {
+        left_keys,
+        right_keys,
+    }
+}
+
+fn new_join_conditions(
+    new_left_keys: &[Arc<dyn PhysicalExpr>],
+    new_right_keys: &[Arc<dyn PhysicalExpr>],
+) -> Vec<(Column, Column)> {
+    let new_join_on = new_left_keys
+        .iter()
+        .zip(new_right_keys.iter())
+        .map(|(l_key, r_key)| {
+            (
+                l_key.as_any().downcast_ref::<Column>().unwrap().clone(),
+                r_key.as_any().downcast_ref::<Column>().unwrap().clone(),
+            )
+        })
+        .collect::<Vec<_>>();
+    new_join_on
+}
+
 fn ensure_distribution_and_ordering(
     plan: Arc<dyn crate::physical_plan::ExecutionPlan>,
     target_partitions: usize,
@@ -199,6 +816,12 @@ fn ordering_satisfy<F: FnOnce() -> Vec<Vec<Column>>>(
     }
 }
 
+#[derive(Debug, Clone)]
+struct JoinKeyPairs {
+    left_keys: Vec<Arc<dyn PhysicalExpr>>,
+    right_keys: Vec<Arc<dyn PhysicalExpr>>,
+}
+
 #[cfg(test)]
 mod tests {
     use arrow::compute::SortOptions;
@@ -208,6 +831,7 @@ mod tests {
     use datafusion_physical_expr::{expressions, PhysicalExpr};
 
     use super::*;
+    use crate::config::ConfigOptions;
     use crate::datasource::listing::PartitionedFile;
     use crate::datasource::object_store::ObjectStoreUrl;
     use crate::physical_plan::aggregates::{
@@ -374,9 +998,16 @@ mod tests {
     }
 
     #[test]
-    fn muti_hash_joins() -> Result<()> {
+    fn multi_hash_joins() -> Result<()> {
         let left = parquet_exec();
-        let right = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+            ("d".to_string(), "d1".to_string()),
+            ("e".to_string(), "e1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
         let join_types = vec![
             JoinType::Inner,
             JoinType::Left,
@@ -386,27 +1017,27 @@ mod tests {
             JoinType::Anti,
         ];
 
-        // Join on (a == b)
+        // Join on (a == b1)
         let join_on = vec![(
             Column::new_with_schema("a", &schema()).unwrap(),
-            Column::new_with_schema("b", &schema()).unwrap(),
-        )];
-
-        // Join on (a == c)
-        let top_join_on = vec![(
-            Column::new_with_schema("a", &schema()).unwrap(),
-            Column::new_with_schema("c", &schema()).unwrap(),
+            Column::new_with_schema("b1", &right.schema()).unwrap(),
         )];
 
         for join_type in join_types {
             let join = hash_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+            // Join on (a == c)
+            let top_join_on = vec![(
+                Column::new_with_schema("a", &join.schema()).unwrap(),
+                Column::new_with_schema("c", &schema()).unwrap(),
+            )];
+
             let top_join =
-                hash_join_exec(join.clone(), right.clone(), &top_join_on, &join_type);
+                hash_join_exec(join.clone(), parquet_exec(), &top_join_on, &join_type);
 
             let top_join_plan =
                 format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
             let join_plan =
-                format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"b\", index: 1 }})]", join_type);
+                format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"b1\", index: 1 }})]", join_type);
 
             let expected = match join_type {
                 // Should include 3 RepartitionExecs
@@ -415,7 +1046,8 @@ mod tests {
                     join_plan.as_str(),
                     "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                    "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
                     "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
@@ -427,7 +1059,8 @@ mod tests {
                     join_plan.as_str(),
                     "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                    "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
                     "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
@@ -435,50 +1068,58 @@ mod tests {
             };
             assert_optimized!(expected, top_join);
 
-            // This time we use (b == c) for top join
-            // Join on (b == c)
-            let top_join_on = vec![(
-                Column::new_with_schema("b", &schema()).unwrap(),
-                Column::new_with_schema("c", &schema()).unwrap(),
-            )];
-
-            let top_join = hash_join_exec(join, right.clone(), &top_join_on, &join_type);
-            let top_join_plan =
-                format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"b\", index: 1 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
-
-            let expected = match join_type {
-                // Should include 3 RepartitionExecs
-                JoinType::Inner | JoinType::Right => vec![
-                    top_join_plan.as_str(),
-                    join_plan.as_str(),
-                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                ],
-                // Should include 4 RepartitionExecs
-                _ => vec![
-                    top_join_plan.as_str(),
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
-                    join_plan.as_str(),
-                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                ],
-            };
-            assert_optimized!(expected, top_join);
+            match join_type {
+                JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                    // This time we use (b1 == c) for top join
+                    // Join on (b1 == c)
+                    let top_join_on = vec![(
+                        Column::new_with_schema("b1", &join.schema()).unwrap(),
+                        Column::new_with_schema("c", &schema()).unwrap(),
+                    )];
+
+                    let top_join =
+                        hash_join_exec(join, parquet_exec(), &top_join_on, &join_type);
+                    let top_join_plan =
+                        format!("HashJoinExec: mode=Partitioned, join_type={}, on=[(Column {{ name: \"b1\", index: 6 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
+
+                    let expected = match join_type {
+                        // Should include 3 RepartitionExecs
+                        JoinType::Inner | JoinType::Right => vec![
+                            top_join_plan.as_str(),
+                            join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                        ],
+                        // Should include 4 RepartitionExecs
+                        _ => vec![
+                            top_join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 6 }], 10)",
+                            join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                        ],
+                    };
+                    assert_optimized!(expected, top_join);
+                }
+                _ => {}
+            }
         }
 
         Ok(())
     }
 
     #[test]
-    fn muti_joins_after_alias() -> Result<()> {
+    fn multi_joins_after_alias() -> Result<()> {
         let left = parquet_exec();
         let right = parquet_exec();
 
@@ -489,7 +1130,7 @@ mod tests {
         )];
         let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
 
-        // Projection(as as a1, a as a2)
+        // Projection(a as a1, a as a2)
         let alias_pairs: Vec<(String, String)> = vec![
             ("a".to_string(), "a1".to_string()),
             ("a".to_string(), "a2".to_string()),
@@ -549,7 +1190,7 @@ mod tests {
     }
 
     #[test]
-    fn muti_joins_after_multi_alias() -> Result<()> {
+    fn multi_joins_after_multi_alias() -> Result<()> {
         let left = parquet_exec();
         let right = parquet_exec();
 
@@ -670,9 +1311,10 @@ mod tests {
         // Only two RepartitionExecs added
         let expected = &[
             "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"b1\", index: 1 }, Column { name: \"b\", index: 0 }), (Column { name: \"a1\", index: 0 }, Column { name: \"a\", index: 1 })]",
-            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1, b1@1 as b1], aggr=[]",
-            "RepartitionExec: partitioning=Hash([Column { name: \"a1\", index: 0 }, Column { name: \"b1\", index: 1 }], 10)",
-            "AggregateExec: mode=Partial, gby=[a@0 as a1, b@1 as b1], aggr=[]",
+            "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b1@1 as b1, a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 0 }, Column { name: \"a1\", index: 1 }], 10)",
+            "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
             "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
             "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
             "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 0 }, Column { name: \"a\", index: 1 }], 10)",
@@ -684,9 +1326,115 @@ mod tests {
     }
 
     #[test]
-    fn muti_smj_joins() -> Result<()> {
+    fn multi_hash_join_key_ordering() -> Result<()> {
         let left = parquet_exec();
-        let right = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
+
+        // Join on (a == a1 and b == b1 and c == c1)
+        let join_on = vec![
+            (
+                Column::new_with_schema("a", &schema()).unwrap(),
+                Column::new_with_schema("a1", &right.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("b", &schema()).unwrap(),
+                Column::new_with_schema("b1", &right.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("c", &schema()).unwrap(),
+                Column::new_with_schema("c1", &right.schema()).unwrap(),
+            ),
+        ];
+        let top_left_join =
+            hash_join_exec(left.clone(), right.clone(), &join_on, &JoinType::Inner);
+
+        // Projection(a as A, a as AA, b as B, c as C)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "A".to_string()),
+            ("a".to_string(), "AA".to_string()),
+            ("b".to_string(), "B".to_string()),
+            ("c".to_string(), "C".to_string()),
+        ];
+        let projection = projection_exec_with_alias(top_left_join, alias_pairs);
+
+        // Join on (c == c1 and b == b1 and a == a1)
+        let join_on = vec![
+            (
+                Column::new_with_schema("c", &schema()).unwrap(),
+                Column::new_with_schema("c1", &right.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("b", &schema()).unwrap(),
+                Column::new_with_schema("b1", &right.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("a", &schema()).unwrap(),
+                Column::new_with_schema("a1", &right.schema()).unwrap(),
+            ),
+        ];
+        let top_right_join =
+            hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Join on (B == b1 and C == c and AA = a1)
+        let top_join_on = vec![
+            (
+                Column::new_with_schema("B", &projection.schema()).unwrap(),
+                Column::new_with_schema("b1", &top_right_join.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("C", &projection.schema()).unwrap(),
+                Column::new_with_schema("c", &top_right_join.schema()).unwrap(),
+            ),
+            (
+                Column::new_with_schema("AA", &projection.schema()).unwrap(),
+                Column::new_with_schema("a1", &top_right_join.schema()).unwrap(),
+            ),
+        ];
+
+        let top_join = hash_join_exec(
+            projection.clone(),
+            top_right_join,
+            &top_join_on,
+            &JoinType::Inner,
+        );
+
+        // Output partition need to respect the Alias and should not introduce additional RepartitionExec
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"B\", index: 2 }, Column { name: \"b1\", index: 6 }), (Column { name: \"C\", index: 3 }, Column { name: \"c\", index: 2 }), (Column { name: \"AA\", index: 1 }, Column { name: \"a1\", index: 5 })]",
+            "ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"b\", index: 1 }, Column { name: \"b1\", index: 1 }), (Column { name: \"c\", index: 2 }, Column { name: \"c1\", index: 2 }), (Column { name: \"a\", index: 0 }, Column { name: \"a1\", index: 0 })]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }, Column { name: \"c\", index: 2 }, Column { name: \"a\", index: 0 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }, Column { name: \"c1\", index: 2 }, Column { name: \"a1\", index: 0 }], 10)",
+            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: \"b\", index: 1 }, Column { name: \"b1\", index: 1 }), (Column { name: \"c\", index: 2 }, Column { name: \"c1\", index: 2 }), (Column { name: \"a\", index: 0 }, Column { name: \"a1\", index: 0 })]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }, Column { name: \"c\", index: 2 }, Column { name: \"a\", index: 0 }], 10)",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }, Column { name: \"c1\", index: 2 }, Column { name: \"a1\", index: 0 }], 10)",
+            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, top_join);
+        Ok(())
+    }
+
+    #[test]
+    fn multi_smj_joins() -> Result<()> {
+        let left = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+            ("d".to_string(), "d1".to_string()),
+            ("e".to_string(), "e1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
         let join_types = vec![
             JoinType::Inner,
             JoinType::Left,
@@ -696,24 +1444,25 @@ mod tests {
             JoinType::Anti,
         ];
 
-        // Join on (a == b)
+        // Join on (a == b1)
         let join_on = vec![(
             Column::new_with_schema("a", &schema()).unwrap(),
-            Column::new_with_schema("b", &schema()).unwrap(),
-        )];
-
-        // Join on (a == c)
-        let top_join_on = vec![(
-            Column::new_with_schema("a", &schema()).unwrap(),
-            Column::new_with_schema("c", &schema()).unwrap(),
+            Column::new_with_schema("b1", &right.schema()).unwrap(),
         )];
 
         for join_type in join_types {
             let join =
                 sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+
+            // Top join on (a == c)
+            let top_join_on = vec![(
+                Column::new_with_schema("a", &join.schema()).unwrap(),
+                Column::new_with_schema("c", &schema()).unwrap(),
+            )];
+
             let top_join = sort_merge_join_exec(
                 join.clone(),
-                right.clone(),
+                parquet_exec(),
                 &top_join_on,
                 &join_type,
             );
@@ -721,7 +1470,7 @@ mod tests {
             let top_join_plan =
                 format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
             let join_plan =
-                format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"b\", index: 1 }})]", join_type);
+                format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"a\", index: 0 }}, Column {{ name: \"b1\", index: 1 }})]", join_type);
 
             let expected = match join_type {
                 // Should include 3 RepartitionExecs 3 SortExecs
@@ -731,8 +1480,9 @@ mod tests {
                     "SortExec: [a@0 ASC]",
                     "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "SortExec: [b@1 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "SortExec: [b1@1 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                    "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
                     "SortExec: [c@2 ASC]",
                     "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
@@ -747,8 +1497,9 @@ mod tests {
                     "SortExec: [a@0 ASC]",
                     "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "SortExec: [b@1 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
+                    "SortExec: [b1@1 ASC]",
+                    "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                    "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
                     "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
                     "SortExec: [c@2 ASC]",
                     "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
@@ -757,51 +1508,61 @@ mod tests {
             };
             assert_optimized!(expected, top_join);
 
-            // This time we use (b == c) for top join
-            // Join on (b == c)
-            let top_join_on = vec![(
-                Column::new_with_schema("b", &schema()).unwrap(),
-                Column::new_with_schema("c", &schema()).unwrap(),
-            )];
-
-            let top_join =
-                sort_merge_join_exec(join, right.clone(), &top_join_on, &join_type);
-            let top_join_plan =
-                format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"b\", index: 1 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
-
-            let expected = match join_type {
-                // Should include 3 RepartitionExecs and 3 SortExecs
-                JoinType::Inner | JoinType::Right => vec![
-                    top_join_plan.as_str(),
-                    join_plan.as_str(),
-                    "SortExec: [a@0 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "SortExec: [b@1 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "SortExec: [c@2 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                ],
-                // Should include 4 RepartitionExecs and 4 SortExecs
-                _ => vec![
-                    top_join_plan.as_str(),
-                    "SortExec: [b@1 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
-                    join_plan.as_str(),
-                    "SortExec: [a@0 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "SortExec: [b@1 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 1 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                    "SortExec: [c@2 ASC]",
-                    "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
-                    "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-                ],
-            };
-            assert_optimized!(expected, top_join);
+            match join_type {
+                JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                    // This time we use (b1 == c) for top join
+                    // Join on (b1 == c)
+                    let top_join_on = vec![(
+                        Column::new_with_schema("b1", &join.schema()).unwrap(),
+                        Column::new_with_schema("c", &schema()).unwrap(),
+                    )];
+                    let top_join = sort_merge_join_exec(
+                        join,
+                        parquet_exec(),
+                        &top_join_on,
+                        &join_type,
+                    );
+                    let top_join_plan =
+                        format!("SortMergeJoin: join_type={}, on=[(Column {{ name: \"b1\", index: 6 }}, Column {{ name: \"c\", index: 2 }})]", join_type);
+
+                    let expected = match join_type {
+                        // Should include 3 RepartitionExecs and 3 SortExecs
+                        JoinType::Inner | JoinType::Right => vec![
+                            top_join_plan.as_str(),
+                            join_plan.as_str(),
+                            "SortExec: [a@0 ASC]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "SortExec: [b1@1 ASC]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "SortExec: [c@2 ASC]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                        ],
+                        // Should include 4 RepartitionExecs and 4 SortExecs
+                        _ => vec![
+                            top_join_plan.as_str(),
+                            "SortExec: [b1@6 ASC]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 6 }], 10)",
+                            join_plan.as_str(),
+                            "SortExec: [a@0 ASC]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"a\", index: 0 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "SortExec: [b1@1 ASC]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 1 }], 10)",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                            "SortExec: [c@2 ASC]",
+                            "RepartitionExec: partitioning=Hash([Column { name: \"c\", index: 2 }], 10)",
+                            "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
+                        ],
+                    };
+                    assert_optimized!(expected, top_join);
+                }
+                _ => {}
+            }
         }
 
         Ok(())
@@ -817,6 +1578,13 @@ mod tests {
                 ("b".to_string(), "b1".to_string()),
             ],
         );
+        //Projection(a1 as a3, b1 as b3)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a1".to_string(), "a3".to_string()),
+            ("b1".to_string(), "b3".to_string()),
+        ];
+        let left = projection_exec_with_alias(left, alias_pairs);
+
         // group by (b, a)
         let right = aggregate_exec_with_alias(
             parquet_exec(),
@@ -826,28 +1594,38 @@ mod tests {
             ],
         );
 
-        // Join on (b1 == b && a1 == a)
+        //Projection(a as a2, b as b2)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a2".to_string()),
+            ("b".to_string(), "b2".to_string()),
+        ];
+        let right = projection_exec_with_alias(right, alias_pairs);
+
+        // Join on (b3 == b2 && a3 == a2)
         let join_on = vec![
             (
-                Column::new_with_schema("b1", &left.schema()).unwrap(),
-                Column::new_with_schema("b", &right.schema()).unwrap(),
+                Column::new_with_schema("b3", &left.schema()).unwrap(),
+                Column::new_with_schema("b2", &right.schema()).unwrap(),
             ),
             (
-                Column::new_with_schema("a1", &left.schema()).unwrap(),
-                Column::new_with_schema("a", &right.schema()).unwrap(),
+                Column::new_with_schema("a3", &left.schema()).unwrap(),
+                Column::new_with_schema("a2", &right.schema()).unwrap(),
             ),
         ];
         let join = sort_merge_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
 
         // Only two RepartitionExecs added
         let expected = &[
-            "SortMergeJoin: join_type=Inner, on=[(Column { name: \"b1\", index: 1 }, Column { name: \"b\", index: 0 }), (Column { name: \"a1\", index: 0 }, Column { name: \"a\", index: 1 })]",
-            "SortExec: [b1@1 ASC,a1@0 ASC]",
-            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1, b1@1 as b1], aggr=[]",
-            "RepartitionExec: partitioning=Hash([Column { name: \"a1\", index: 0 }, Column { name: \"b1\", index: 1 }], 10)",
-            "AggregateExec: mode=Partial, gby=[a@0 as a1, b@1 as b1], aggr=[]",
+            "SortMergeJoin: join_type=Inner, on=[(Column { name: \"b3\", index: 1 }, Column { name: \"b2\", index: 1 }), (Column { name: \"a3\", index: 0 }, Column { name: \"a2\", index: 0 })]",
+            "SortExec: [b3@1 ASC,a3@0 ASC]",
+            "ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]",
+            "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b1@1 as b1, a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([Column { name: \"b1\", index: 0 }, Column { name: \"a1\", index: 1 }], 10)",
+            "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
             "ParquetExec: limit=None, partitions=[x], projection=[a, b, c, d, e]",
-            "SortExec: [b@0 ASC,a@1 ASC]",
+            "SortExec: [b2@1 ASC,a2@0 ASC]",
+            "ProjectionExec: expr=[a@1 as a2, b@0 as b2]",
             "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
             "RepartitionExec: partitioning=Hash([Column { name: \"b\", index: 0 }, Column { name: \"a\", index: 1 }], 10)",
             "AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
diff --git a/datafusion/core/src/physical_plan/aggregates/mod.rs b/datafusion/core/src/physical_plan/aggregates/mod.rs
index c1fa46229ff0c..90305d7556adc 100644
--- a/datafusion/core/src/physical_plan/aggregates/mod.rs
+++ b/datafusion/core/src/physical_plan/aggregates/mod.rs
@@ -153,19 +153,19 @@ impl PhysicalGroupBy {
 #[derive(Debug)]
 pub struct AggregateExec {
     /// Aggregation mode (full, partial)
-    mode: AggregateMode,
+    pub mode: AggregateMode,
     /// Group by expressions
-    group_by: PhysicalGroupBy,
+    pub group_by: PhysicalGroupBy,
     /// Aggregate expressions
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+    pub aggr_expr: Vec<Arc<dyn AggregateExpr>>,
     /// Input plan, could be a partial aggregate or the input to the aggregate
-    input: Arc<dyn ExecutionPlan>,
+    pub input: Arc<dyn ExecutionPlan>,
     /// Schema after the aggregate is applied
     schema: SchemaRef,
     /// Input schema before any aggregation is applied. For partial aggregate this will be the
     /// same as input.schema() but for the final aggregate it will be the same as the input
     /// to the partial aggregate
-    input_schema: SchemaRef,
+    pub input_schema: SchemaRef,
     /// The alias map used to normalize out expressions like Partitioning
     alias_map: HashMap<Column, Vec<Column>>,
     /// Execution Metrics
diff --git a/datafusion/core/src/physical_plan/hash_join.rs b/datafusion/core/src/physical_plan/hash_join.rs
index 47b0d357125e5..6c3527faa05f0 100644
--- a/datafusion/core/src/physical_plan/hash_join.rs
+++ b/datafusion/core/src/physical_plan/hash_join.rs
@@ -83,6 +83,7 @@ use crate::physical_plan::{Distribution, PhysicalExpr};
 
 use crate::physical_plan::join_utils::{OnceAsync, OnceFut};
 use datafusion_physical_expr::combine_equivalence_properties;
+use datafusion_physical_expr::TreeNodeRewritable;
 use log::debug;
 use std::cmp;
 use std::fmt;
@@ -119,15 +120,15 @@ type JoinLeftData = (JoinHashMap, RecordBatch);
 #[derive(Debug)]
 pub struct HashJoinExec {
     /// left (build) side which gets hashed
-    left: Arc<dyn ExecutionPlan>,
+    pub left: Arc<dyn ExecutionPlan>,
     /// right (probe) side which are filtered by the hash table
-    right: Arc<dyn ExecutionPlan>,
+    pub right: Arc<dyn ExecutionPlan>,
     /// Set of common columns used to join on
-    on: Vec<(Column, Column)>,
+    pub on: Vec<(Column, Column)>,
     /// Filters which are applied while finding matching rows
-    filter: Option<JoinFilter>,
+    pub filter: Option<JoinFilter>,
     /// How the join is performed
-    join_type: JoinType,
+    pub join_type: JoinType,
     /// The schema once the join is applied
     schema: SchemaRef,
     /// Build-side data
@@ -135,13 +136,13 @@ pub struct HashJoinExec {
     /// Shares the `RandomState` for the hashing algorithm
     random_state: RandomState,
     /// Partitioning mode to use
-    mode: PartitionMode,
+    pub mode: PartitionMode,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Information of index and left / right placement of columns
     column_indices: Vec<ColumnIndex>,
     /// If null_equals_null is true, null == null else null != null
-    null_equals_null: bool,
+    pub null_equals_null: bool,
 }
 
 /// Metrics for HashJoinExec
@@ -329,7 +330,36 @@ impl ExecutionPlan for HashJoinExec {
         match self.join_type {
             JoinType::Inner => self.left.output_partitioning(),
             JoinType::Left => self.left.output_partitioning(),
-            JoinType::Right => self.right.output_partitioning(),
+            JoinType::Right => {
+                let left_columns_len = self.left.schema().fields.len();
+                match self.right.output_partitioning() {
+                    Partitioning::RoundRobinBatch(size) => {
+                        Partitioning::RoundRobinBatch(size)
+                    }
+                    Partitioning::Hash(exprs, size) => {
+                        let new_exprs = exprs
+                            .into_iter()
+                            .map(|expr| {
+                                expr.transform_down(&|e| match e
+                                    .as_any()
+                                    .downcast_ref::<Column>()
+                                {
+                                    Some(col) => Some(Arc::new(Column::new(
+                                        col.name(),
+                                        left_columns_len + col.index(),
+                                    ))),
+                                    None => None,
+                                })
+                                .unwrap()
+                            })
+                            .collect::<Vec<_>>();
+                        Partitioning::Hash(new_exprs, size)
+                    }
+                    Partitioning::UnknownPartitioning(size) => {
+                        Partitioning::UnknownPartitioning(size)
+                    }
+                }
+            }
             _ => Partitioning::UnknownPartitioning(
                 self.right.output_partitioning().partition_count(),
             ),
@@ -344,12 +374,34 @@ impl ExecutionPlan for HashJoinExec {
 
     fn equivalence_properties(&self) -> Vec<Vec<Column>> {
         let mut left_properties = self.left.equivalence_properties();
-        let right_properties = self.right.equivalence_properties();
-        left_properties.extend(right_properties);
+        match self.join_type {
+            JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => {
+                let right_properties = self.right.equivalence_properties();
+                let left_columns_len = self.left.schema().fields.len();
+                let new_right_properties = right_properties
+                    .into_iter()
+                    .map(|cols| {
+                        cols.into_iter()
+                            .map(|col| {
+                                Column::new(col.name(), left_columns_len + col.index())
+                            })
+                            .collect::<Vec<_>>()
+                    })
+                    .collect::<Vec<_>>();
+                left_properties.extend(new_right_properties);
+            }
+            JoinType::Semi | JoinType::Anti => {}
+        }
 
         if self.join_type == JoinType::Inner {
+            let left_columns_len = self.left.schema().fields.len();
             self.on.iter().for_each(|(column1, column2)| {
-                combine_equivalence_properties(&mut left_properties, (column1, column2))
+                let new_column2 =
+                    Column::new(column2.name(), left_columns_len + column2.index());
+                combine_equivalence_properties(
+                    &mut left_properties,
+                    (column1, &new_column2),
+                )
             })
         }
         left_properties
diff --git a/datafusion/core/src/physical_plan/mod.rs b/datafusion/core/src/physical_plan/mod.rs
index 6505c94914fc4..79988dda6b38b 100644
--- a/datafusion/core/src/physical_plan/mod.rs
+++ b/datafusion/core/src/physical_plan/mod.rs
@@ -541,7 +541,7 @@ pub trait TreeNodeRewritable: Clone {
         };
 
         let after_op_children =
-            self.map_children(|node| node.transform_using(rewriter))?;
+            self.map_children_mut(|node| node.transform_using(rewriter))?;
 
         // now rewrite this node itself
         if need_mutate {
@@ -595,6 +595,11 @@ pub trait TreeNodeRewritable: Clone {
 
     /// Apply transform `F` to the node's children, the transform `F` might have a direction(Preorder or Postorder)
     fn map_children<F>(self, transform: F) -> Result<Self>
+    where
+        F: Fn(Self) -> Result<Self>;
+
+    /// Apply transform `F` to the node's children, the transform `F` might have a direction(Preorder or Postorder)
+    fn map_children_mut<F>(self, transform: F) -> Result<Self>
     where
         F: FnMut(Self) -> Result<Self>;
 }
@@ -628,6 +633,19 @@ pub enum RewriteRecursion {
 
 impl TreeNodeRewritable for Arc<dyn ExecutionPlan> {
     fn map_children<F>(self, transform: F) -> Result<Self>
+    where
+        F: Fn(Self) -> Result<Self>,
+    {
+        if !self.children().is_empty() {
+            let new_children: Result<Vec<_>> =
+                self.children().into_iter().map(transform).collect();
+            with_new_children_if_necessary(self, new_children?)
+        } else {
+            Ok(self)
+        }
+    }
+
+    fn map_children_mut<F>(self, transform: F) -> Result<Self>
     where
         F: FnMut(Self) -> Result<Self>,
     {
@@ -732,7 +750,7 @@ impl Partitioning {
                     // then we need to have the partition count and hash functions validation.
                     Partitioning::Hash(partition_exprs, _) => {
                         let fast_match =
-                            expr_list_eq_any_order(&required_exprs, partition_exprs);
+                            expr_list_eq_strict_order(&required_exprs, partition_exprs);
                         // If the required exprs do not match, need to leverage the eq_properties provided by the child
                         // and normalize both exprs based on the eq_properties
                         if !fast_match {
@@ -756,7 +774,7 @@ impl Partitioning {
                                         )
                                     })
                                     .collect::<Vec<_>>();
-                                expr_list_eq_any_order(
+                                expr_list_eq_strict_order(
                                     &normalized_required_exprs,
                                     &normalized_partition_exprs,
                                 )
@@ -783,7 +801,7 @@ impl PartialEq for Partitioning {
                 Partitioning::RoundRobinBatch(count2),
             ) if count1 == count2 => true,
             (Partitioning::Hash(exprs1, count1), Partitioning::Hash(exprs2, count2))
-                if expr_list_eq_any_order(exprs1, exprs2) && (count1 == count2) =>
+                if expr_list_eq_strict_order(exprs1, exprs2) && (count1 == count2) =>
             {
                 true
             }
@@ -893,6 +911,6 @@ pub mod windows;
 use crate::execution::context::TaskContext;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
-    expr_list_eq_any_order, normalize_expr_with_equivalence_properties,
+    expr_list_eq_strict_order, normalize_expr_with_equivalence_properties,
 };
 pub use datafusion_physical_expr::{expressions, functions, type_coercion, udf};
diff --git a/datafusion/core/src/physical_plan/projection.rs b/datafusion/core/src/physical_plan/projection.rs
index 96afd26517d23..aba68ea8ac1ec 100644
--- a/datafusion/core/src/physical_plan/projection.rs
+++ b/datafusion/core/src/physical_plan/projection.rs
@@ -50,13 +50,13 @@ use futures::stream::StreamExt;
 #[derive(Debug)]
 pub struct ProjectionExec {
     /// The projection expressions stored as tuples of (expression, output column name)
-    expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
+    pub expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
     /// The alias map used to normalize out expressions like Partitioning and PhysicalSortExpr
-    alias_map: HashMap<Column, Vec<Column>>,
+    pub alias_map: HashMap<Column, Vec<Column>>,
     /// The schema once the projection has been applied to the input
-    schema: SchemaRef,
+    pub schema: SchemaRef,
     /// The input plan
-    input: Arc<dyn ExecutionPlan>,
+    pub input: Arc<dyn ExecutionPlan>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
 }
diff --git a/datafusion/core/src/physical_plan/sort_merge_join.rs b/datafusion/core/src/physical_plan/sort_merge_join.rs
index d16e1a61ea793..eddfdaeaf22ca 100644
--- a/datafusion/core/src/physical_plan/sort_merge_join.rs
+++ b/datafusion/core/src/physical_plan/sort_merge_join.rs
@@ -33,7 +33,9 @@ use arrow::compute::{take, SortOptions};
 use arrow::datatypes::{DataType, SchemaRef, TimeUnit};
 use arrow::error::{ArrowError, Result as ArrowResult};
 use arrow::record_batch::RecordBatch;
-use datafusion_physical_expr::{combine_equivalence_properties, PhysicalExpr};
+use datafusion_physical_expr::{
+    combine_equivalence_properties, PhysicalExpr, TreeNodeRewritable,
+};
 use futures::{Stream, StreamExt};
 
 use crate::error::DataFusionError;
@@ -55,13 +57,13 @@ use crate::physical_plan::{
 #[derive(Debug)]
 pub struct SortMergeJoinExec {
     /// Left sorted joining execution plan
-    left: Arc<dyn ExecutionPlan>,
+    pub left: Arc<dyn ExecutionPlan>,
     /// Right sorting joining execution plan
-    right: Arc<dyn ExecutionPlan>,
+    pub right: Arc<dyn ExecutionPlan>,
     /// Set of common columns used to join on
-    on: JoinOn,
+    pub on: JoinOn,
     /// How the join is performed
-    join_type: JoinType,
+    pub join_type: JoinType,
     /// The schema once the join is applied
     schema: SchemaRef,
     /// Execution metrics
@@ -70,10 +72,12 @@ pub struct SortMergeJoinExec {
     left_sort_exprs: Vec<PhysicalSortExpr>,
     /// The right SortExpr
     right_sort_exprs: Vec<PhysicalSortExpr>,
+    /// The output ordering
+    output_ordering: Option<Vec<PhysicalSortExpr>>,
     /// Sort options of join columns used in sorting left and right execution plans
-    sort_options: Vec<SortOptions>,
+    pub sort_options: Vec<SortOptions>,
     /// If null_equals_null is true, null == null else null != null
-    null_equals_null: bool,
+    pub null_equals_null: bool,
 }
 
 impl SortMergeJoinExec {
@@ -129,6 +133,41 @@ impl SortMergeJoinExec {
             })
             .collect::<Vec<_>>();
 
+        let output_ordering = match join_type {
+            JoinType::Inner | JoinType::Left | JoinType::Semi | JoinType::Anti => {
+                left.output_ordering().map(|sort_exprs| sort_exprs.to_vec())
+            }
+            JoinType::Right => {
+                let left_columns_len = left.schema().fields.len();
+                right.output_ordering().map(|sort_exprs| {
+                    sort_exprs
+                        .iter()
+                        .map(|e| {
+                            let new_expr = e
+                                .expr
+                                .clone()
+                                .transform_down(&|e| match e
+                                    .as_any()
+                                    .downcast_ref::<Column>()
+                                {
+                                    Some(col) => Some(Arc::new(Column::new(
+                                        col.name(),
+                                        left_columns_len + col.index(),
+                                    ))),
+                                    None => None,
+                                })
+                                .unwrap();
+                            PhysicalSortExpr {
+                                expr: new_expr,
+                                options: e.options,
+                            }
+                        })
+                        .collect::<Vec<_>>()
+                })
+            }
+            JoinType::Full => None,
+        };
+
         let schema =
             Arc::new(build_join_schema(&left_schema, &right_schema, &join_type).0);
 
@@ -141,6 +180,7 @@ impl SortMergeJoinExec {
             metrics: ExecutionPlanMetricsSet::new(),
             left_sort_exprs,
             right_sort_exprs,
+            output_ordering,
             sort_options,
             null_equals_null,
         })
@@ -160,7 +200,36 @@ impl ExecutionPlan for SortMergeJoinExec {
         match self.join_type {
             JoinType::Inner => self.left.output_partitioning(),
             JoinType::Left => self.left.output_partitioning(),
-            JoinType::Right => self.right.output_partitioning(),
+            JoinType::Right => {
+                let left_columns_len = self.left.schema().fields.len();
+                match self.right.output_partitioning() {
+                    Partitioning::RoundRobinBatch(size) => {
+                        Partitioning::RoundRobinBatch(size)
+                    }
+                    Partitioning::Hash(exprs, size) => {
+                        let new_exprs = exprs
+                            .into_iter()
+                            .map(|expr| {
+                                expr.transform_down(&|e| match e
+                                    .as_any()
+                                    .downcast_ref::<Column>()
+                                {
+                                    Some(col) => Some(Arc::new(Column::new(
+                                        col.name(),
+                                        left_columns_len + col.index(),
+                                    ))),
+                                    None => None,
+                                })
+                                .unwrap()
+                            })
+                            .collect::<Vec<_>>();
+                        Partitioning::Hash(new_exprs, size)
+                    }
+                    Partitioning::UnknownPartitioning(size) => {
+                        Partitioning::UnknownPartitioning(size)
+                    }
+                }
+            }
             _ => Partitioning::UnknownPartitioning(
                 self.right.output_partitioning().partition_count(),
             ),
@@ -168,13 +237,7 @@ impl ExecutionPlan for SortMergeJoinExec {
     }
 
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        match self.join_type {
-            JoinType::Inner | JoinType::Left | JoinType::Semi | JoinType::Anti => {
-                self.left.output_ordering()
-            }
-            JoinType::Right => self.right.output_ordering(),
-            JoinType::Full => None,
-        }
+        self.output_ordering.as_deref()
     }
 
     fn required_input_distribution(&self) -> Vec<Distribution> {
@@ -200,12 +263,34 @@ impl ExecutionPlan for SortMergeJoinExec {
 
     fn equivalence_properties(&self) -> Vec<Vec<Column>> {
         let mut left_properties = self.left.equivalence_properties();
-        let right_properties = self.right.equivalence_properties();
-        left_properties.extend(right_properties);
+        match self.join_type {
+            JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => {
+                let right_properties = self.right.equivalence_properties();
+                let left_columns_len = self.left.schema().fields.len();
+                let new_right_properties = right_properties
+                    .into_iter()
+                    .map(|cols| {
+                        cols.into_iter()
+                            .map(|col| {
+                                Column::new(col.name(), left_columns_len + col.index())
+                            })
+                            .collect::<Vec<_>>()
+                    })
+                    .collect::<Vec<_>>();
+                left_properties.extend(new_right_properties);
+            }
+            JoinType::Semi | JoinType::Anti => {}
+        }
 
         if self.join_type == JoinType::Inner {
+            let left_columns_len = self.left.schema().fields.len();
             self.on.iter().for_each(|(column1, column2)| {
-                combine_equivalence_properties(&mut left_properties, (column1, column2))
+                let new_column2 =
+                    Column::new(column2.name(), left_columns_len + column2.index());
+                combine_equivalence_properties(
+                    &mut left_properties,
+                    (column1, &new_column2),
+                )
             })
         }
         left_properties
diff --git a/datafusion/core/src/physical_plan/windows/window_agg_exec.rs b/datafusion/core/src/physical_plan/windows/window_agg_exec.rs
index 457ac165f886f..5dfbaee1e5f5f 100644
--- a/datafusion/core/src/physical_plan/windows/window_agg_exec.rs
+++ b/datafusion/core/src/physical_plan/windows/window_agg_exec.rs
@@ -47,17 +47,17 @@ use std::task::{Context, Poll};
 #[derive(Debug)]
 pub struct WindowAggExec {
     /// Input plan
-    input: Arc<dyn ExecutionPlan>,
+    pub input: Arc<dyn ExecutionPlan>,
     /// Window function expression
-    window_expr: Vec<Arc<dyn WindowExpr>>,
+    pub window_expr: Vec<Arc<dyn WindowExpr>>,
     /// Schema after the window is run
     schema: SchemaRef,
     /// Schema before the window
-    input_schema: SchemaRef,
+    pub input_schema: SchemaRef,
     /// Partition Keys
-    partition_keys: Vec<Arc<dyn PhysicalExpr>>,
+    pub partition_keys: Vec<Arc<dyn PhysicalExpr>>,
     /// Sort Keys
-    sort_keys: Option<Vec<PhysicalSortExpr>>,
+    pub sort_keys: Option<Vec<PhysicalSortExpr>>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
 }
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 1872de3e1435e..e97482075dc27 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -81,6 +81,7 @@ pub use is_not_null::{is_not_null, IsNotNullExpr};
 pub use is_null::{is_null, IsNullExpr};
 pub use literal::{lit, Literal};
 pub use negative::{negative, NegativeExpr};
+pub use no_op::NoOp;
 pub use not::{not, NotExpr};
 pub use nullif::nullif_func;
 pub use try_cast::{try_cast, TryCastExpr};
diff --git a/datafusion/physical-expr/src/expressions/no_op.rs b/datafusion/physical-expr/src/expressions/no_op.rs
index 2c916ed321c99..b81bcfc30b905 100644
--- a/datafusion/physical-expr/src/expressions/no_op.rs
+++ b/datafusion/physical-expr/src/expressions/no_op.rs
@@ -31,7 +31,7 @@ use datafusion_common::Result;
 use datafusion_expr::ColumnarValue;
 
 /// A place holder expressions, can not be evaluated
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Default)]
 pub struct NoOp {}
 
 impl NoOp {
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 0578ae26dcf48..78e5b171140f7 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -45,6 +45,7 @@ pub use aggregate::AggregateExpr;
 pub use datafusion_common::from_slice;
 pub use physical_expr::combine_equivalence_properties;
 pub use physical_expr::expr_list_eq_any_order;
+pub use physical_expr::expr_list_eq_strict_order;
 pub use physical_expr::merge_equivalence_properties_with_alias;
 pub use physical_expr::normalize_expr_with_equivalence_properties;
 pub use physical_expr::normalize_out_expr_with_alias_schema;
@@ -54,6 +55,7 @@ pub use physical_expr::sort_expr_list_eq_strict_order;
 pub use physical_expr::split_predicate;
 pub use physical_expr::truncate_equivalence_properties_not_in_schema;
 pub use physical_expr::PhysicalExpr;
+pub use physical_expr::TreeNodeRewritable;
 pub use planner::create_physical_expr;
 pub use scalar_function::ScalarFunctionExpr;
 pub use sort_expr::PhysicalSortExpr;
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index 0006d926072ce..a203d6aec1058 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -289,10 +289,6 @@ fn scatter(mask: &BooleanArray, truthy: &dyn Array) -> Result<ArrayRef> {
 /// For example two InListExpr can be considered to be equals no matter the order:
 ///
 /// In('a','b','c') == In('c','b','a')
-///
-/// Another example is for Partition Exprs, we can safely consider the below two exprs are equal:
-///
-/// HashPartitioned('a','b','c') == HashPartitioned('c','b','a')
 pub fn expr_list_eq_any_order(
     list1: &[Arc<dyn PhysicalExpr>],
     list2: &[Arc<dyn PhysicalExpr>],
@@ -313,6 +309,14 @@ pub fn expr_list_eq_any_order(
     }
 }
 
+/// Strictly compare the two expr lists are equal in the given order.
+pub fn expr_list_eq_strict_order(
+    list1: &[Arc<dyn PhysicalExpr>],
+    list2: &[Arc<dyn PhysicalExpr>],
+) -> bool {
+    list1.len() == list2.len() && list1.iter().zip(list2.iter()).all(|(e1, e2)| e1.eq(e2))
+}
+
 /// Strictly compare the two sort expr lists in the given order.
 ///
 /// For Physical Sort Exprs, the order matters:
@@ -479,7 +483,7 @@ pub fn normalize_out_expr_with_alias_schema(
 
 pub fn normalize_expr_with_equivalence_properties(
     expr: Arc<dyn PhysicalExpr>,
-    eq_properties: &Vec<Vec<Column>>,
+    eq_properties: &[Vec<Column>],
 ) -> Arc<dyn PhysicalExpr> {
     let mut normalized = expr.clone();
     if let Some(column) = expr.as_any().downcast_ref::<Column>() {
@@ -495,7 +499,7 @@ pub fn normalize_expr_with_equivalence_properties(
 
 pub fn normalize_sort_expr_with_equivalence_properties(
     sort_expr: PhysicalSortExpr,
-    eq_properties: &Vec<Vec<Column>>,
+    eq_properties: &[Vec<Column>],
 ) -> PhysicalSortExpr {
     let mut normalized = sort_expr.clone();
     if let Some(column) = sort_expr.expr.as_any().downcast_ref::<Column>() {