From 7c5d2258e8567e1b3d8c4eb4a27a81f171d0a442 Mon Sep 17 00:00:00 2001
From: Cheng-Yuan-Lai <a186235@g,ail.com>
Date: Sun, 8 Jun 2025 00:16:34 +0800
Subject: [PATCH 01/11] feat: replace snapshot tests for enforce_sorting

---
 .../physical_optimizer/enforce_sorting.rs     | 164 ++++++++++--------
 1 file changed, 87 insertions(+), 77 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index 38bc10a967e2b..88a5d70313d79 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use insta::allow_duplicates;
+use insta::assert_snapshot;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
@@ -92,7 +94,7 @@ fn csv_exec_sorted(
 /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
 ///
 macro_rules! assert_optimized {
-    ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => {
+    (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr) => {
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
 
@@ -143,30 +145,23 @@ macro_rules! assert_optimized {
 
         let physical_plan = $PLAN;
         let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-        let actual: Vec<&str> = formatted.trim().lines().collect();
+        let actual = formatted.trim();
 
-        let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-            .iter().map(|s| *s).collect();
-
-        assert_eq!(
-            expected_plan_lines, actual,
-            "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
-        );
-
-        let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
-            .iter().map(|s| *s).collect();
+        allow_duplicates! {
+            assert_snapshot!(actual, @$EXPECTED_PLAN_LINES);
+        }
 
         // Run the actual optimizer
         let optimized_physical_plan =
             EnforceSorting::new().optimize(physical_plan,&config)?;
 
         // Get string representation of the plan
-        let actual = get_plan_string(&optimized_physical_plan);
-        assert_eq!(
-            expected_optimized_lines, actual,
-            "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected_optimized_lines:#?}\nactual:\n\n{actual:#?}\n\n"
-        );
+        let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
+        let actual = plan_lines.trim();
 
+        allow_duplicates! {
+            assert_snapshot!(actual, @ $EXPECTED_OPTIMIZED_PLAN_LINES);
+        }
     };
 }
 
@@ -1225,7 +1220,12 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> {
                 ]
             }
         };
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+        assert_optimized!(
+            @ "",
+            @ "",
+            physical_plan,
+            true
+        );
     }
     Ok(())
 }
@@ -1298,7 +1298,7 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
                 ]
             }
         };
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+        // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
     }
     Ok(())
 }
@@ -1514,18 +1514,18 @@ async fn test_with_lost_ordering_unbounded_bounded(
                 expected_optimized_bounded_parallelize_sort,
             )
         };
-    assert_optimized!(
-        expected_input,
-        expected_optimized,
-        physical_plan.clone(),
-        false
-    );
-    assert_optimized!(
-        expected_input,
-        expected_optimized_sort_parallelize,
-        physical_plan,
-        true
-    );
+    // assert_optimized!(
+    //     expected_input,
+    //     expected_optimized,
+    //     physical_plan.clone(),
+    //     false
+    // );
+    // assert_optimized!(
+    //     expected_input,
+    //     expected_optimized_sort_parallelize,
+    //     physical_plan,
+    //     true
+    // );
 
     Ok(())
 }
@@ -1616,7 +1616,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
         "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, false);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, false);
 
     Ok(())
 }
@@ -1639,7 +1639,7 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> {
         "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet"
     ];
     let expected_no_change = expected_input;
-    assert_optimized!(expected_input, expected_no_change, physical_plan, false);
+    // assert_optimized!(expected_input, expected_no_change, physical_plan, false);
     Ok(())
 }
 
@@ -1652,7 +1652,7 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> {
 /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
 /// `$CASE_NUMBER` (optional): The test case number to print on failure.
 macro_rules! assert_optimized {
-    ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
+    (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
 
@@ -1703,32 +1703,42 @@ macro_rules! assert_optimized {
 
         let physical_plan = $PLAN;
         let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-        let actual: Vec<&str> = formatted.trim().lines().collect();
-
-        let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-            .iter().map(|s| *s).collect();
-
-        if expected_plan_lines != actual {
-            $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
-            println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
-            assert_eq!(expected_plan_lines, actual);
+        let actual = formatted.trim();
+
+        // let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
+        //     .iter().map(|s| *s).collect();
+
+        // if expected_plan_lines != actual {
+        //     $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
+        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
+        //     assert_eq!(expected_plan_lines, actual);
+        // }
+        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
+         allow_duplicates! {
+            assert_snapshot!(actual, @$EXPECTED_PLAN_LINES);
         }
 
-        let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
-            .iter().map(|s| *s).collect();
+
+        // let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
+        //     .iter().map(|s| *s).collect();
 
         // Run the actual optimizer
         let optimized_physical_plan =
             EnforceSorting::new().optimize(physical_plan, &config)?;
 
         // Get string representation of the plan
-        let actual = get_plan_string(&optimized_physical_plan);
-        if expected_optimized_lines != actual {
-            $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
-            println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
-            assert_eq!(expected_optimized_lines, actual);
+        let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
+        let actual = plan_lines.trim();
+        // if expected_optimized_lines != actual {
+        //     $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
+        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
+        //     assert_eq!(expected_optimized_lines, actual);
+        // }
+        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
+        allow_duplicates! {
+            assert_snapshot!(actual, @ $EXPECTED_OPTIMIZED_PLAN_LINES);
         }
-    };
+    }
 }
 
 #[tokio::test]
@@ -1747,7 +1757,7 @@ async fn test_remove_unnecessary_sort() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1824,7 +1834,7 @@ async fn test_add_required_sort() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1850,7 +1860,7 @@ async fn test_remove_unnecessary_sort1() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1888,7 +1898,7 @@ async fn test_remove_unnecessary_sort2() -> Result<()> {
         "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1931,7 +1941,7 @@ async fn test_remove_unnecessary_sort3() -> Result<()> {
         "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "      DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2014,7 +2024,7 @@ async fn test_remove_unnecessary_sort6() -> Result<()> {
         "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2047,7 +2057,7 @@ async fn test_remove_unnecessary_sort7() -> Result<()> {
         "  SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2078,7 +2088,7 @@ async fn test_remove_unnecessary_sort8() -> Result<()> {
         "  SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2103,7 +2113,7 @@ async fn test_do_not_pushdown_through_limit() -> Result<()> {
         "    SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "      DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2128,7 +2138,7 @@ async fn test_remove_unnecessary_spm1() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2152,7 +2162,7 @@ async fn test_remove_unnecessary_spm2() -> Result<()> {
         "  SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, input, true);
+    // assert_optimized!(expected_input, expected_optimized, input, true);
 
     Ok(())
 }
@@ -2177,7 +2187,7 @@ async fn test_change_wrong_sorting() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2204,7 +2214,7 @@ async fn test_change_wrong_sorting2() -> Result<()> {
         "SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2326,7 +2336,7 @@ async fn test_coalesce_propagate() -> Result<()> {
         "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "      DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2355,7 +2365,7 @@ async fn test_replace_with_partial_sort2() -> Result<()> {
         "PartialSortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], common_prefix_length=[2]",
         "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
     Ok(())
 }
 
@@ -2431,7 +2441,7 @@ async fn test_replace_with_partial_sort() -> Result<()> {
         "PartialSortExec: expr=[a@0 ASC, c@2 ASC], common_prefix_length=[1]",
         "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
     ];
-    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
     Ok(())
 }
 
@@ -3709,13 +3719,13 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> {
         let ordering = LexOrdering::new(sort_expr).unwrap();
         let physical_plan = sort_exec(ordering, window_exec);
 
-        assert_optimized!(
-            case.initial_plan,
-            case.expected_plan,
-            physical_plan,
-            true,
-            case_idx
-        );
+        // assert_optimized!(
+        //     case.initial_plan,
+        //     case.expected_plan,
+        //     physical_plan,
+        //     true,
+        //     case_idx
+        // );
     }
 
     Ok(())
@@ -3742,7 +3752,7 @@ fn test_removes_unused_orthogonal_sort() -> Result<()> {
     let expected_optimized = [
         "StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]"
     ];
-    assert_optimized!(expected_input, expected_optimized, output_sort, true);
+    // assert_optimized!(expected_input, expected_optimized, output_sort, true);
 
     Ok(())
 }
@@ -3767,7 +3777,7 @@ fn test_keeps_used_orthogonal_sort() -> Result<()> {
 
     // Test: should keep the orthogonal sort, since it modifies the output:
     let expected_optimized = expected_input;
-    assert_optimized!(expected_input, expected_optimized, output_sort, true);
+    // assert_optimized!(expected_input, expected_optimized, output_sort, true);
 
     Ok(())
 }
@@ -3804,7 +3814,7 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> {
         "  SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]",
         "    StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]",
     ];
-    assert_optimized!(expected_input, expected_optimized, output_sort, true);
+    // assert_optimized!(expected_input, expected_optimized, output_sort, true);
 
     Ok(())
 }

From 6090b9c2ae7004555a669eba85dff4420bcd1a9f Mon Sep 17 00:00:00 2001
From: Ian Lai <Ian.Lai@senao.com>
Date: Tue, 17 Jun 2025 09:48:29 +0000
Subject: [PATCH 02/11] feat: modify assert_optimized macro to test one
 snapshot with a combined physical plan

---
 .../physical_optimizer/enforce_sorting.rs     | 212 +++++++++---------
 1 file changed, 108 insertions(+), 104 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index 88a5d70313d79..c47cca5f2cb38 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -17,6 +17,7 @@
 
 use insta::allow_duplicates;
 use insta::assert_snapshot;
+use insta::with_settings;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
@@ -85,6 +86,90 @@ fn csv_exec_sorted(
     DataSourceExec::from_data_source(config)
 }
 
+/// Runs the sort enforcement optimizer and asserts the plan
+/// against the original and expected plans
+///
+/// `$PLAN`: the plan to optimized
+/// `$REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
+/// `$EXPECTED_PLAN_COMBINED`: The expected combined plan string with the original and optimized plans.
+///
+macro_rules! assert_optimized {
+    (
+        $PLAN: expr,
+        $REPARTITION_SORTS: expr,
+        @ $EXPECTED_PLAN_COMBINED: literal $(,)?
+    ) => {{
+        let mut config = ConfigOptions::new();
+        config.optimizer.repartition_sorts = $REPARTITION_SORTS;
+
+        // This file has 4 rules that use tree node, apply these rules as in the
+        // EnforceSorting::optimize implementation
+        // After these operations tree nodes should be in a consistent state.
+        // This code block makes sure that these rules doesn't violate tree node integrity.
+        let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
+        let adjusted = plan_requirements
+            .transform_up(ensure_sorting)
+            .data()
+            .and_then(check_integrity)?;
+        // TODO: End state payloads will be checked here.
+
+        let new_plan = if config.optimizer.repartition_sorts {
+            let plan_with_coalesce_partitions =
+                PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
+            let parallel = plan_with_coalesce_partitions
+                .transform_up(parallelize_sorts)
+                .data()
+                .and_then(check_integrity)?;
+            // TODO: End state payloads will be checked here.
+            parallel.plan
+        } else {
+            adjusted.plan
+        };
+
+        let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
+        let updated_plan = plan_with_pipeline_fixer
+            .transform_up(|plan_with_pipeline_fixer| {
+                replace_with_order_preserving_variants(
+                    plan_with_pipeline_fixer,
+                    false,
+                    true,
+                    &config,
+                )
+            })
+            .data()
+            .and_then(check_integrity)?;
+        // TODO: End state payloads will be checked here.
+
+        let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
+        assign_initial_requirements(&mut sort_pushdown);
+        check_integrity(pushdown_sorts(sort_pushdown)?)?;
+        // TODO: End state payloads will be checked here.
+
+
+        let physical_plan = $PLAN;
+        let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
+        let actual = formatted.trim();
+
+        // Run the actual optimizer
+        let optimized_physical_plan =
+            EnforceSorting::new().optimize(physical_plan,&config)?;
+
+        // Get string representation of the plan
+        let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
+        let actual_optimized = plan_lines.trim();
+
+        let actual_combined = format!(
+            "=== BEFORE OPTIMIZATION ===\n\n{}\n\n=== AFTER OPTIMIZATION ===\n\n{}",
+            actual,
+            actual_optimized
+        );
+
+        assert_snapshot!(actual_combined, @ $EXPECTED_PLAN_COMBINED);
+
+
+    }};
+}
+
 /// Runs the sort enforcement optimizer and asserts the plan
 /// against the original and expected plans
 ///
@@ -92,9 +177,9 @@ fn csv_exec_sorted(
 /// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
 /// `$PLAN`: the plan to optimized
 /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
-///
+/// `$CASE_NUMBER` (optional): The test case number to print on failure.
 macro_rules! assert_optimized {
-    (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr) => {
+    (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
 
@@ -147,22 +232,39 @@ macro_rules! assert_optimized {
         let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
         let actual = formatted.trim();
 
-        allow_duplicates! {
+        // let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
+        //     .iter().map(|s| *s).collect();
+
+        // if expected_plan_lines != actual {
+        //     $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
+        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
+        //     assert_eq!(expected_plan_lines, actual);
+        // }
+        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
+         allow_duplicates! {
             assert_snapshot!(actual, @$EXPECTED_PLAN_LINES);
         }
 
+        // let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
+        //     .iter().map(|s| *s).collect();
+
         // Run the actual optimizer
         let optimized_physical_plan =
-            EnforceSorting::new().optimize(physical_plan,&config)?;
+            EnforceSorting::new().optimize(physical_plan, &config)?;
 
         // Get string representation of the plan
         let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
         let actual = plan_lines.trim();
-
+        // if expected_optimized_lines != actual {
+        //     $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
+        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
+        //     assert_eq!(expected_optimized_lines, actual);
+        // }
+        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
         allow_duplicates! {
             assert_snapshot!(actual, @ $EXPECTED_OPTIMIZED_PLAN_LINES);
         }
-    };
+    }
 }
 
 #[tokio::test]
@@ -1643,104 +1745,6 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> {
     Ok(())
 }
 
-/// Runs the sort enforcement optimizer and asserts the plan
-/// against the original and expected plans
-///
-/// `$EXPECTED_PLAN_LINES`: input plan
-/// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
-/// `$PLAN`: the plan to optimized
-/// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
-/// `$CASE_NUMBER` (optional): The test case number to print on failure.
-macro_rules! assert_optimized {
-    (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
-        let mut config = ConfigOptions::new();
-        config.optimizer.repartition_sorts = $REPARTITION_SORTS;
-
-        // This file has 4 rules that use tree node, apply these rules as in the
-        // EnforceSorting::optimize implementation
-        // After these operations tree nodes should be in a consistent state.
-        // This code block makes sure that these rules doesn't violate tree node integrity.
-        {
-            let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
-            let adjusted = plan_requirements
-                .transform_up(ensure_sorting)
-                .data()
-                .and_then(check_integrity)?;
-            // TODO: End state payloads will be checked here.
-
-            let new_plan = if config.optimizer.repartition_sorts {
-                let plan_with_coalesce_partitions =
-                    PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
-                let parallel = plan_with_coalesce_partitions
-                    .transform_up(parallelize_sorts)
-                    .data()
-                    .and_then(check_integrity)?;
-                // TODO: End state payloads will be checked here.
-                parallel.plan
-            } else {
-                adjusted.plan
-            };
-
-            let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
-            let updated_plan = plan_with_pipeline_fixer
-                .transform_up(|plan_with_pipeline_fixer| {
-                    replace_with_order_preserving_variants(
-                        plan_with_pipeline_fixer,
-                        false,
-                        true,
-                        &config,
-                    )
-                })
-                .data()
-                .and_then(check_integrity)?;
-            // TODO: End state payloads will be checked here.
-
-            let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
-            assign_initial_requirements(&mut sort_pushdown);
-            check_integrity(pushdown_sorts(sort_pushdown)?)?;
-            // TODO: End state payloads will be checked here.
-        }
-
-        let physical_plan = $PLAN;
-        let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-        let actual = formatted.trim();
-
-        // let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-        //     .iter().map(|s| *s).collect();
-
-        // if expected_plan_lines != actual {
-        //     $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
-        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
-        //     assert_eq!(expected_plan_lines, actual);
-        // }
-        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
-         allow_duplicates! {
-            assert_snapshot!(actual, @$EXPECTED_PLAN_LINES);
-        }
-
-
-        // let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
-        //     .iter().map(|s| *s).collect();
-
-        // Run the actual optimizer
-        let optimized_physical_plan =
-            EnforceSorting::new().optimize(physical_plan, &config)?;
-
-        // Get string representation of the plan
-        let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
-        let actual = plan_lines.trim();
-        // if expected_optimized_lines != actual {
-        //     $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
-        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
-        //     assert_eq!(expected_optimized_lines, actual);
-        // }
-        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
-        allow_duplicates! {
-            assert_snapshot!(actual, @ $EXPECTED_OPTIMIZED_PLAN_LINES);
-        }
-    }
-}
-
 #[tokio::test]
 async fn test_remove_unnecessary_sort() -> Result<()> {
     let schema = create_test_schema()?;

From 1e2fe6177be877e92d04c757a63efeba30a49e92 Mon Sep 17 00:00:00 2001
From: Cheng-Yuan-Lai <a186235@g,ail.com>
Date: Fri, 20 Jun 2025 00:39:54 +0800
Subject: [PATCH 03/11] feat: update assert_optimized to support snapshot
 testing

---
 .../combine_partial_final_agg.rs              |  90 +++++----
 .../physical_optimizer/enforce_sorting.rs     | 180 +++++++++---------
 2 files changed, 141 insertions(+), 129 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
index de8f7b36a3a70..77a67b6554e68 100644
--- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
@@ -20,6 +20,7 @@
 //! Note these tests are not in the same module as the optimizer pass because
 //! they rely on `DataSourceExec` which is in the core crate.
 
+use insta::assert_snapshot;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{parquet_exec, trim_plan_display};
@@ -43,8 +44,8 @@ use datafusion_physical_plan::ExecutionPlan;
 
 /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
 macro_rules! assert_optimized {
-    ($EXPECTED_LINES: expr, $PLAN: expr) => {
-        let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
+    ($PLAN: expr, @ $EXPECTED_LINES: literal $(,)?) => {
+        // let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
 
         // run optimizer
         let optimizer = CombinePartialFinalAggregate {};
@@ -52,13 +53,16 @@ macro_rules! assert_optimized {
         let optimized = optimizer.optimize($PLAN, &config)?;
         // Now format correctly
         let plan = displayable(optimized.as_ref()).indent(true).to_string();
-        let actual_lines = trim_plan_display(&plan);
+        // let actual_lines = trim_plan_display(&plan);
+        let actual_lines = plan.trim();
 
-        assert_eq!(
-            &expected_lines, &actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
+        // assert_eq!(
+        //     &expected_lines, &actual_lines,
+        //     "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+        //     expected_lines, actual_lines
+        // );
+
+        assert_snapshot!(actual_lines, @ $EXPECTED_LINES);
     };
 }
 
@@ -144,13 +148,15 @@ fn aggregations_not_combined() -> datafusion_common::Result<()> {
         aggr_expr,
     );
     // should not combine the Partial/Final AggregateExecs
-    let expected = &[
-        "AggregateExec: mode=Final, gby=[], aggr=[COUNT(1)]",
-        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "AggregateExec: mode=Partial, gby=[], aggr=[COUNT(1)]",
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet",
-    ];
-    assert_optimized!(expected, plan);
+    assert_optimized!(
+        plan,
+        @ r"
+    AggregateExec: mode=Final, gby=[], aggr=[COUNT(1)]
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        AggregateExec: mode=Partial, gby=[], aggr=[COUNT(1)]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet
+    "
+    );
 
     let aggr_expr1 = vec![count_expr(lit(1i8), "COUNT(1)", &schema)];
     let aggr_expr2 = vec![count_expr(lit(1i8), "COUNT(2)", &schema)];
@@ -165,13 +171,14 @@ fn aggregations_not_combined() -> datafusion_common::Result<()> {
         aggr_expr2,
     );
     // should not combine the Partial/Final AggregateExecs
-    let expected = &[
-        "AggregateExec: mode=Final, gby=[], aggr=[COUNT(2)]",
-        "AggregateExec: mode=Partial, gby=[], aggr=[COUNT(1)]",
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet",
-    ];
-
-    assert_optimized!(expected, plan);
+    assert_optimized!(
+        plan,
+        @ r"
+    AggregateExec: mode=Final, gby=[], aggr=[COUNT(2)]
+      AggregateExec: mode=Partial, gby=[], aggr=[COUNT(1)]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet
+    "
+    );
 
     Ok(())
 }
@@ -191,12 +198,13 @@ fn aggregations_combined() -> datafusion_common::Result<()> {
         aggr_expr,
     );
     // should combine the Partial/Final AggregateExecs to the Single AggregateExec
-    let expected = &[
-        "AggregateExec: mode=Single, gby=[], aggr=[COUNT(1)]",
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet",
-    ];
-
-    assert_optimized!(expected, plan);
+    assert_optimized!(
+        plan,
+        @ "
+    AggregateExec: mode=Single, gby=[], aggr=[COUNT(1)]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet
+    "
+    );
     Ok(())
 }
 
@@ -224,12 +232,13 @@ fn aggregations_with_group_combined() -> datafusion_common::Result<()> {
 
     let plan = final_aggregate_exec(partial_agg, final_group_by, aggr_expr);
     // should combine the Partial/Final AggregateExecs to the Single AggregateExec
-    let expected = &[
-        "AggregateExec: mode=Single, gby=[c@2 as c], aggr=[Sum(b)]",
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet",
-    ];
-
-    assert_optimized!(expected, plan);
+    assert_optimized!(
+        plan,
+        @ r"
+    AggregateExec: mode=Single, gby=[c@2 as c], aggr=[Sum(b)]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet
+    "
+    );
     Ok(())
 }
 
@@ -265,11 +274,12 @@ fn aggregations_with_limit_combined() -> datafusion_common::Result<()> {
     let plan: Arc<dyn ExecutionPlan> = final_agg;
     // should combine the Partial/Final AggregateExecs to a Single AggregateExec
     // with the final limit preserved
-    let expected = &[
-        "AggregateExec: mode=Single, gby=[c@2 as c], aggr=[], lim=[5]",
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet",
-    ];
-
-    assert_optimized!(expected, plan);
+    assert_optimized!(
+        plan,
+        @ r"
+    AggregateExec: mode=Single, gby=[c@2 as c], aggr=[], lim=[5]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c], file_type=parquet
+    "
+    );
     Ok(())
 }
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index c47cca5f2cb38..df563a4a4343b 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -97,11 +97,13 @@ macro_rules! assert_optimized {
     (
         $PLAN: expr,
         $REPARTITION_SORTS: expr,
-        @ $EXPECTED_PLAN_COMBINED: literal $(,)?
+        @ $EXPECTED_PLAN_COMBINED: literal $(, $CASE_NUMBER: expr)? $(,)?
     ) => {{
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
 
+        $(println!("\n**Testing case {}**", $CASE_NUMBER);)?
+
         // This file has 4 rules that use tree node, apply these rules as in the
         // EnforceSorting::optimize implementation
         // After these operations tree nodes should be in a consistent state.
@@ -178,94 +180,94 @@ macro_rules! assert_optimized {
 /// `$PLAN`: the plan to optimized
 /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
 /// `$CASE_NUMBER` (optional): The test case number to print on failure.
-macro_rules! assert_optimized {
-    (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
-        let mut config = ConfigOptions::new();
-        config.optimizer.repartition_sorts = $REPARTITION_SORTS;
-
-        // This file has 4 rules that use tree node, apply these rules as in the
-        // EnforceSorting::optimize implementation
-        // After these operations tree nodes should be in a consistent state.
-        // This code block makes sure that these rules doesn't violate tree node integrity.
-        {
-            let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
-            let adjusted = plan_requirements
-                .transform_up(ensure_sorting)
-                .data()
-                .and_then(check_integrity)?;
-            // TODO: End state payloads will be checked here.
-
-            let new_plan = if config.optimizer.repartition_sorts {
-                let plan_with_coalesce_partitions =
-                    PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
-                let parallel = plan_with_coalesce_partitions
-                    .transform_up(parallelize_sorts)
-                    .data()
-                    .and_then(check_integrity)?;
-                // TODO: End state payloads will be checked here.
-                parallel.plan
-            } else {
-                adjusted.plan
-            };
-
-            let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
-            let updated_plan = plan_with_pipeline_fixer
-                .transform_up(|plan_with_pipeline_fixer| {
-                    replace_with_order_preserving_variants(
-                        plan_with_pipeline_fixer,
-                        false,
-                        true,
-                        &config,
-                    )
-                })
-                .data()
-                .and_then(check_integrity)?;
-            // TODO: End state payloads will be checked here.
-
-            let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
-            assign_initial_requirements(&mut sort_pushdown);
-            check_integrity(pushdown_sorts(sort_pushdown)?)?;
-            // TODO: End state payloads will be checked here.
-        }
-
-        let physical_plan = $PLAN;
-        let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-        let actual = formatted.trim();
-
-        // let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-        //     .iter().map(|s| *s).collect();
-
-        // if expected_plan_lines != actual {
-        //     $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
-        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
-        //     assert_eq!(expected_plan_lines, actual);
-        // }
-        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
-         allow_duplicates! {
-            assert_snapshot!(actual, @$EXPECTED_PLAN_LINES);
-        }
-
-        // let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
-        //     .iter().map(|s| *s).collect();
-
-        // Run the actual optimizer
-        let optimized_physical_plan =
-            EnforceSorting::new().optimize(physical_plan, &config)?;
-
-        // Get string representation of the plan
-        let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
-        let actual = plan_lines.trim();
-        // if expected_optimized_lines != actual {
-        //     $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
-        //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
-        //     assert_eq!(expected_optimized_lines, actual);
-        // }
-        $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
-        allow_duplicates! {
-            assert_snapshot!(actual, @ $EXPECTED_OPTIMIZED_PLAN_LINES);
-        }
-    }
-}
+// macro_rules! assert_optimized {
+//     (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
+//         let mut config = ConfigOptions::new();
+//         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
+
+//         // This file has 4 rules that use tree node, apply these rules as in the
+//         // EnforceSorting::optimize implementation
+//         // After these operations tree nodes should be in a consistent state.
+//         // This code block makes sure that these rules doesn't violate tree node integrity.
+//         {
+//             let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
+//             let adjusted = plan_requirements
+//                 .transform_up(ensure_sorting)
+//                 .data()
+//                 .and_then(check_integrity)?;
+//             // TODO: End state payloads will be checked here.
+
+//             let new_plan = if config.optimizer.repartition_sorts {
+//                 let plan_with_coalesce_partitions =
+//                     PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
+//                 let parallel = plan_with_coalesce_partitions
+//                     .transform_up(parallelize_sorts)
+//                     .data()
+//                     .and_then(check_integrity)?;
+//                 // TODO: End state payloads will be checked here.
+//                 parallel.plan
+//             } else {
+//                 adjusted.plan
+//             };
+
+//             let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
+//             let updated_plan = plan_with_pipeline_fixer
+//                 .transform_up(|plan_with_pipeline_fixer| {
+//                     replace_with_order_preserving_variants(
+//                         plan_with_pipeline_fixer,
+//                         false,
+//                         true,
+//                         &config,
+//                     )
+//                 })
+//                 .data()
+//                 .and_then(check_integrity)?;
+//             // TODO: End state payloads will be checked here.
+
+//             let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
+//             assign_initial_requirements(&mut sort_pushdown);
+//             check_integrity(pushdown_sorts(sort_pushdown)?)?;
+//             // TODO: End state payloads will be checked here.
+//         }
+
+//         let physical_plan = $PLAN;
+//         let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
+//         let actual = formatted.trim();
+
+//         // let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
+//         //     .iter().map(|s| *s).collect();
+
+//         // if expected_plan_lines != actual {
+//         //     $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
+//         //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
+//         //     assert_eq!(expected_plan_lines, actual);
+//         // }
+//         $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
+//          allow_duplicates! {
+//             assert_snapshot!(actual, @$EXPECTED_PLAN_LINES);
+//         }
+
+//         // let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
+//         //     .iter().map(|s| *s).collect();
+
+//         // Run the actual optimizer
+//         let optimized_physical_plan =
+//             EnforceSorting::new().optimize(physical_plan, &config)?;
+
+//         // Get string representation of the plan
+//         let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
+//         let actual = plan_lines.trim();
+//         // if expected_optimized_lines != actual {
+//         //     $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
+//         //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
+//         //     assert_eq!(expected_optimized_lines, actual);
+//         // }
+//         $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
+//         allow_duplicates! {
+//             assert_snapshot!(actual, @ $EXPECTED_OPTIMIZED_PLAN_LINES);
+//         }
+//     }
+// }
 
 #[tokio::test]
 async fn test_remove_unnecessary_sort5() -> Result<()> {

From e0d6fdcb3b406262c28d05cabd60dae431b52c8e Mon Sep 17 00:00:00 2001
From: Cheng-Yuan-Lai <a186235@g,ail.com>
Date: Tue, 24 Jun 2025 00:44:52 +0800
Subject: [PATCH 04/11] Revert "feat: replace snapshot tests for
 enforce_sorting"

This reverts commit 8c921fabc6722669682c8306ab283c2d5bd1ccba.
---
 .../physical_optimizer/enforce_sorting.rs     | 305 +++++++++---------
 1 file changed, 147 insertions(+), 158 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index df563a4a4343b..eecb772ba264d 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -15,9 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use insta::allow_duplicates;
-use insta::assert_snapshot;
-use insta::with_settings;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
@@ -94,11 +91,7 @@ fn csv_exec_sorted(
 /// `$EXPECTED_PLAN_COMBINED`: The expected combined plan string with the original and optimized plans.
 ///
 macro_rules! assert_optimized {
-    (
-        $PLAN: expr,
-        $REPARTITION_SORTS: expr,
-        @ $EXPECTED_PLAN_COMBINED: literal $(, $CASE_NUMBER: expr)? $(,)?
-    ) => {{
+    ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => {
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
 
@@ -150,125 +143,33 @@ macro_rules! assert_optimized {
 
         let physical_plan = $PLAN;
         let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-        let actual = formatted.trim();
+        let actual: Vec<&str> = formatted.trim().lines().collect();
+
+        let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
+            .iter().map(|s| *s).collect();
+
+        assert_eq!(
+            expected_plan_lines, actual,
+            "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+        );
+
+        let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
+            .iter().map(|s| *s).collect();
 
         // Run the actual optimizer
         let optimized_physical_plan =
             EnforceSorting::new().optimize(physical_plan,&config)?;
 
         // Get string representation of the plan
-        let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
-        let actual_optimized = plan_lines.trim();
-
-        let actual_combined = format!(
-            "=== BEFORE OPTIMIZATION ===\n\n{}\n\n=== AFTER OPTIMIZATION ===\n\n{}",
-            actual,
-            actual_optimized
+        let actual = get_plan_string(&optimized_physical_plan);
+        assert_eq!(
+            expected_optimized_lines, actual,
+            "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected_optimized_lines:#?}\nactual:\n\n{actual:#?}\n\n"
         );
 
-        assert_snapshot!(actual_combined, @ $EXPECTED_PLAN_COMBINED);
-
-
-    }};
+    };
 }
 
-/// Runs the sort enforcement optimizer and asserts the plan
-/// against the original and expected plans
-///
-/// `$EXPECTED_PLAN_LINES`: input plan
-/// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
-/// `$PLAN`: the plan to optimized
-/// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
-/// `$CASE_NUMBER` (optional): The test case number to print on failure.
-// macro_rules! assert_optimized {
-//     (@ $EXPECTED_PLAN_LINES: literal $(,)?, @ $EXPECTED_OPTIMIZED_PLAN_LINES: literal $(,)?, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
-//         let mut config = ConfigOptions::new();
-//         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
-
-//         // This file has 4 rules that use tree node, apply these rules as in the
-//         // EnforceSorting::optimize implementation
-//         // After these operations tree nodes should be in a consistent state.
-//         // This code block makes sure that these rules doesn't violate tree node integrity.
-//         {
-//             let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
-//             let adjusted = plan_requirements
-//                 .transform_up(ensure_sorting)
-//                 .data()
-//                 .and_then(check_integrity)?;
-//             // TODO: End state payloads will be checked here.
-
-//             let new_plan = if config.optimizer.repartition_sorts {
-//                 let plan_with_coalesce_partitions =
-//                     PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
-//                 let parallel = plan_with_coalesce_partitions
-//                     .transform_up(parallelize_sorts)
-//                     .data()
-//                     .and_then(check_integrity)?;
-//                 // TODO: End state payloads will be checked here.
-//                 parallel.plan
-//             } else {
-//                 adjusted.plan
-//             };
-
-//             let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
-//             let updated_plan = plan_with_pipeline_fixer
-//                 .transform_up(|plan_with_pipeline_fixer| {
-//                     replace_with_order_preserving_variants(
-//                         plan_with_pipeline_fixer,
-//                         false,
-//                         true,
-//                         &config,
-//                     )
-//                 })
-//                 .data()
-//                 .and_then(check_integrity)?;
-//             // TODO: End state payloads will be checked here.
-
-//             let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
-//             assign_initial_requirements(&mut sort_pushdown);
-//             check_integrity(pushdown_sorts(sort_pushdown)?)?;
-//             // TODO: End state payloads will be checked here.
-//         }
-
-//         let physical_plan = $PLAN;
-//         let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-//         let actual = formatted.trim();
-
-//         // let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-//         //     .iter().map(|s| *s).collect();
-
-//         // if expected_plan_lines != actual {
-//         //     $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
-//         //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
-//         //     assert_eq!(expected_plan_lines, actual);
-//         // }
-//         $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
-//          allow_duplicates! {
-//             assert_snapshot!(actual, @$EXPECTED_PLAN_LINES);
-//         }
-
-//         // let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
-//         //     .iter().map(|s| *s).collect();
-
-//         // Run the actual optimizer
-//         let optimized_physical_plan =
-//             EnforceSorting::new().optimize(physical_plan, &config)?;
-
-//         // Get string representation of the plan
-//         let plan_lines = get_plan_string(&optimized_physical_plan).join("\n");
-//         let actual = plan_lines.trim();
-//         // if expected_optimized_lines != actual {
-//         //     $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
-//         //     println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
-//         //     assert_eq!(expected_optimized_lines, actual);
-//         // }
-//         $(println!("\n**Testing Original Plan for case {}**", $CASE_NUMBER);)?
-//         allow_duplicates! {
-//             assert_snapshot!(actual, @ $EXPECTED_OPTIMIZED_PLAN_LINES);
-//         }
-//     }
-// }
-
 #[tokio::test]
 async fn test_remove_unnecessary_sort5() -> Result<()> {
     let left_schema = create_test_schema2()?;
@@ -1402,7 +1303,7 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
                 ]
             }
         };
-        // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
     }
     Ok(())
 }
@@ -1618,18 +1519,18 @@ async fn test_with_lost_ordering_unbounded_bounded(
                 expected_optimized_bounded_parallelize_sort,
             )
         };
-    // assert_optimized!(
-    //     expected_input,
-    //     expected_optimized,
-    //     physical_plan.clone(),
-    //     false
-    // );
-    // assert_optimized!(
-    //     expected_input,
-    //     expected_optimized_sort_parallelize,
-    //     physical_plan,
-    //     true
-    // );
+    assert_optimized!(
+        expected_input,
+        expected_optimized,
+        physical_plan.clone(),
+        false
+    );
+    assert_optimized!(
+        expected_input,
+        expected_optimized_sort_parallelize,
+        physical_plan,
+        true
+    );
 
     Ok(())
 }
@@ -1720,7 +1621,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
         "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, false);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, false);
 
     Ok(())
 }
@@ -1743,10 +1644,98 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> {
         "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet"
     ];
     let expected_no_change = expected_input;
-    // assert_optimized!(expected_input, expected_no_change, physical_plan, false);
+    assert_optimized!(expected_input, expected_no_change, physical_plan, false);
     Ok(())
 }
 
+/// Runs the sort enforcement optimizer and asserts the plan
+/// against the original and expected plans
+///
+/// `$EXPECTED_PLAN_LINES`: input plan
+/// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
+/// `$PLAN`: the plan to optimized
+/// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
+/// `$CASE_NUMBER` (optional): The test case number to print on failure.
+macro_rules! assert_optimized {
+    ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => {
+        let mut config = ConfigOptions::new();
+        config.optimizer.repartition_sorts = $REPARTITION_SORTS;
+
+        // This file has 4 rules that use tree node, apply these rules as in the
+        // EnforceSorting::optimize implementation
+        // After these operations tree nodes should be in a consistent state.
+        // This code block makes sure that these rules doesn't violate tree node integrity.
+        {
+            let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
+            let adjusted = plan_requirements
+                .transform_up(ensure_sorting)
+                .data()
+                .and_then(check_integrity)?;
+            // TODO: End state payloads will be checked here.
+
+            let new_plan = if config.optimizer.repartition_sorts {
+                let plan_with_coalesce_partitions =
+                    PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
+                let parallel = plan_with_coalesce_partitions
+                    .transform_up(parallelize_sorts)
+                    .data()
+                    .and_then(check_integrity)?;
+                // TODO: End state payloads will be checked here.
+                parallel.plan
+            } else {
+                adjusted.plan
+            };
+
+            let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
+            let updated_plan = plan_with_pipeline_fixer
+                .transform_up(|plan_with_pipeline_fixer| {
+                    replace_with_order_preserving_variants(
+                        plan_with_pipeline_fixer,
+                        false,
+                        true,
+                        &config,
+                    )
+                })
+                .data()
+                .and_then(check_integrity)?;
+            // TODO: End state payloads will be checked here.
+
+            let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
+            assign_initial_requirements(&mut sort_pushdown);
+            check_integrity(pushdown_sorts(sort_pushdown)?)?;
+            // TODO: End state payloads will be checked here.
+        }
+
+        let physical_plan = $PLAN;
+        let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
+        let actual: Vec<&str> = formatted.trim().lines().collect();
+
+        let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
+            .iter().map(|s| *s).collect();
+
+        if expected_plan_lines != actual {
+            $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)?
+            println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual);
+            assert_eq!(expected_plan_lines, actual);
+        }
+
+        let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
+            .iter().map(|s| *s).collect();
+
+        // Run the actual optimizer
+        let optimized_physical_plan =
+            EnforceSorting::new().optimize(physical_plan, &config)?;
+
+        // Get string representation of the plan
+        let actual = get_plan_string(&optimized_physical_plan);
+        if expected_optimized_lines != actual {
+            $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)?
+            println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual);
+            assert_eq!(expected_optimized_lines, actual);
+        }
+    };
+}
+
 #[tokio::test]
 async fn test_remove_unnecessary_sort() -> Result<()> {
     let schema = create_test_schema()?;
@@ -1763,7 +1752,7 @@ async fn test_remove_unnecessary_sort() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1840,7 +1829,7 @@ async fn test_add_required_sort() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1866,7 +1855,7 @@ async fn test_remove_unnecessary_sort1() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1904,7 +1893,7 @@ async fn test_remove_unnecessary_sort2() -> Result<()> {
         "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -1947,7 +1936,7 @@ async fn test_remove_unnecessary_sort3() -> Result<()> {
         "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "      DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2030,7 +2019,7 @@ async fn test_remove_unnecessary_sort6() -> Result<()> {
         "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2063,7 +2052,7 @@ async fn test_remove_unnecessary_sort7() -> Result<()> {
         "  SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2094,7 +2083,7 @@ async fn test_remove_unnecessary_sort8() -> Result<()> {
         "  SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2119,7 +2108,7 @@ async fn test_do_not_pushdown_through_limit() -> Result<()> {
         "    SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "      DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2144,7 +2133,7 @@ async fn test_remove_unnecessary_spm1() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2168,7 +2157,7 @@ async fn test_remove_unnecessary_spm2() -> Result<()> {
         "  SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, input, true);
+    assert_optimized!(expected_input, expected_optimized, input, true);
 
     Ok(())
 }
@@ -2193,7 +2182,7 @@ async fn test_change_wrong_sorting() -> Result<()> {
         "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2220,7 +2209,7 @@ async fn test_change_wrong_sorting2() -> Result<()> {
         "SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
         "  DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2342,7 +2331,7 @@ async fn test_coalesce_propagate() -> Result<()> {
         "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
         "      DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
 
     Ok(())
 }
@@ -2371,7 +2360,7 @@ async fn test_replace_with_partial_sort2() -> Result<()> {
         "PartialSortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], common_prefix_length=[2]",
         "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
     Ok(())
 }
 
@@ -2447,7 +2436,7 @@ async fn test_replace_with_partial_sort() -> Result<()> {
         "PartialSortExec: expr=[a@0 ASC, c@2 ASC], common_prefix_length=[1]",
         "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+    assert_optimized!(expected_input, expected_optimized, physical_plan, true);
     Ok(())
 }
 
@@ -3725,13 +3714,13 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> {
         let ordering = LexOrdering::new(sort_expr).unwrap();
         let physical_plan = sort_exec(ordering, window_exec);
 
-        // assert_optimized!(
-        //     case.initial_plan,
-        //     case.expected_plan,
-        //     physical_plan,
-        //     true,
-        //     case_idx
-        // );
+        assert_optimized!(
+            case.initial_plan,
+            case.expected_plan,
+            physical_plan,
+            true,
+            case_idx
+        );
     }
 
     Ok(())
@@ -3758,7 +3747,7 @@ fn test_removes_unused_orthogonal_sort() -> Result<()> {
     let expected_optimized = [
         "StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]"
     ];
-    // assert_optimized!(expected_input, expected_optimized, output_sort, true);
+    assert_optimized!(expected_input, expected_optimized, output_sort, true);
 
     Ok(())
 }
@@ -3783,7 +3772,7 @@ fn test_keeps_used_orthogonal_sort() -> Result<()> {
 
     // Test: should keep the orthogonal sort, since it modifies the output:
     let expected_optimized = expected_input;
-    // assert_optimized!(expected_input, expected_optimized, output_sort, true);
+    assert_optimized!(expected_input, expected_optimized, output_sort, true);
 
     Ok(())
 }
@@ -3820,7 +3809,7 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> {
         "  SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]",
         "    StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]",
     ];
-    // assert_optimized!(expected_input, expected_optimized, output_sort, true);
+    assert_optimized!(expected_input, expected_optimized, output_sort, true);
 
     Ok(())
 }

From 18644f5d0808aff67b4e4a661930900421ff992a Mon Sep 17 00:00:00 2001
From: Ian Lai <Ian.Lai@senao.com>
Date: Mon, 30 Jun 2025 07:16:14 +0000
Subject: [PATCH 05/11] feat: migrate core test to insta

---
 .../combine_partial_final_agg.rs              |   9 +-
 .../physical_optimizer/enforce_sorting.rs     |  77 ++-
 .../limited_distinct_aggregation.rs           | 255 +++++----
 .../physical_optimizer/projection_pushdown.rs | 522 +++++++++++-------
 .../physical_optimizer/sanity_checker.rs      | 151 ++---
 .../tests/physical_optimizer/test_utils.rs    |  21 +-
 datafusion/core/tests/sql/joins.rs            | 109 ++--
 7 files changed, 646 insertions(+), 498 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
index 77a67b6554e68..7e31600cdc4d5 100644
--- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
@@ -23,7 +23,7 @@
 use insta::assert_snapshot;
 use std::sync::Arc;
 
-use crate::physical_optimizer::test_utils::{parquet_exec, trim_plan_display};
+use crate::physical_optimizer::test_utils::parquet_exec;
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
@@ -53,15 +53,8 @@ macro_rules! assert_optimized {
         let optimized = optimizer.optimize($PLAN, &config)?;
         // Now format correctly
         let plan = displayable(optimized.as_ref()).indent(true).to_string();
-        // let actual_lines = trim_plan_display(&plan);
         let actual_lines = plan.trim();
 
-        // assert_eq!(
-        //     &expected_lines, &actual_lines,
-        //     "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-        //     expected_lines, actual_lines
-        // );
-
         assert_snapshot!(actual_lines, @ $EXPECTED_LINES);
     };
 }
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index eecb772ba264d..a6e7cded15b03 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -95,50 +95,50 @@ macro_rules! assert_optimized {
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = $REPARTITION_SORTS;
 
-        $(println!("\n**Testing case {}**", $CASE_NUMBER);)?
-
         // This file has 4 rules that use tree node, apply these rules as in the
         // EnforceSorting::optimize implementation
         // After these operations tree nodes should be in a consistent state.
         // This code block makes sure that these rules doesn't violate tree node integrity.
-        let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
-        let adjusted = plan_requirements
-            .transform_up(ensure_sorting)
-            .data()
-            .and_then(check_integrity)?;
-        // TODO: End state payloads will be checked here.
-
-        let new_plan = if config.optimizer.repartition_sorts {
-            let plan_with_coalesce_partitions =
-                PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
-            let parallel = plan_with_coalesce_partitions
-                .transform_up(parallelize_sorts)
+        {
+            let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
+            let adjusted = plan_requirements
+                .transform_up(ensure_sorting)
                 .data()
                 .and_then(check_integrity)?;
             // TODO: End state payloads will be checked here.
-            parallel.plan
-        } else {
-            adjusted.plan
-        };
 
-        let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
-        let updated_plan = plan_with_pipeline_fixer
-            .transform_up(|plan_with_pipeline_fixer| {
-                replace_with_order_preserving_variants(
-                    plan_with_pipeline_fixer,
-                    false,
-                    true,
-                    &config,
-                )
-            })
-            .data()
-            .and_then(check_integrity)?;
-        // TODO: End state payloads will be checked here.
+            let new_plan = if config.optimizer.repartition_sorts {
+                let plan_with_coalesce_partitions =
+                    PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
+                let parallel = plan_with_coalesce_partitions
+                    .transform_up(parallelize_sorts)
+                    .data()
+                    .and_then(check_integrity)?;
+                // TODO: End state payloads will be checked here.
+                parallel.plan
+            } else {
+                adjusted.plan
+            };
+
+            let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
+            let updated_plan = plan_with_pipeline_fixer
+                .transform_up(|plan_with_pipeline_fixer| {
+                    replace_with_order_preserving_variants(
+                        plan_with_pipeline_fixer,
+                        false,
+                        true,
+                        &config,
+                    )
+                })
+                .data()
+                .and_then(check_integrity)?;
+            // TODO: End state payloads will be checked here.
 
-        let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
-        assign_initial_requirements(&mut sort_pushdown);
-        check_integrity(pushdown_sorts(sort_pushdown)?)?;
-        // TODO: End state payloads will be checked here.
+            let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
+            assign_initial_requirements(&mut sort_pushdown);
+            check_integrity(pushdown_sorts(sort_pushdown)?)?;
+            // TODO: End state payloads will be checked here.
+        }
 
 
         let physical_plan = $PLAN;
@@ -1225,12 +1225,7 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> {
                 ]
             }
         };
-        assert_optimized!(
-            @ "",
-            @ "",
-            physical_plan,
-            true
-        );
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
     }
     Ok(())
 }
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index 409d392f7819e..8e4f25cb11dff 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -17,11 +17,12 @@
 
 //! Integration tests for [`LimitedDistinctAggregation`] physical optimizer rule
 
+use insta::assert_snapshot;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
-    assert_plan_matches_expected, build_group_by, mock_data, parquet_exec_with_sort,
-    schema, TestAggregate,
+    build_group_by, mock_data, parquet_exec_with_sort, plan_matches_expected, schema,
+    TestAggregate,
 };
 
 use arrow::datatypes::DataType;
@@ -39,16 +40,12 @@ use datafusion_physical_plan::{
     ExecutionPlan,
 };
 
-async fn assert_results_match_expected(
-    plan: Arc<dyn ExecutionPlan>,
-    expected: &str,
-) -> Result<()> {
+async fn assert_results_match_expected(plan: Arc<dyn ExecutionPlan>) -> Result<String> {
     let cfg = SessionConfig::new().with_target_partitions(1);
     let ctx = SessionContext::new_with_config(cfg);
     let batches = collect(plan, ctx.task_ctx()).await?;
     let actual = format!("{}", pretty_format_batches(&batches)?);
-    assert_eq!(actual, expected);
-    Ok(())
+    Ok(actual)
 }
 
 #[tokio::test]
@@ -77,27 +74,34 @@ async fn test_partial_final() -> Result<()> {
         Arc::new(final_agg),
         4, // fetch
     );
-    // expected to push the limit to the Partial and Final AggregateExecs
-    let expected = [
-        "LocalLimitExec: fetch=4",
-        "AggregateExec: mode=Final, gby=[a@0 as a], aggr=[], lim=[4]",
-        "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[], lim=[4]",
-        "DataSourceExec: partitions=1, partition_sizes=[1]",
-    ];
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    assert_plan_matches_expected(&plan, &expected)?;
-    let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-    .trim();
-    assert_results_match_expected(plan, expected).await?;
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=4
+      AggregateExec: mode=Final, gby=[a@0 as a], aggr=[], lim=[4]
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[], lim=[4]
+          DataSourceExec: partitions=1, partition_sizes=[1]
+    "
+    );
+    let expected = assert_results_match_expected(plan).await?;
+    let actual = expected.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    +---+
+    | a |
+    +---+
+    | 1 |
+    | 2 |
+    |   |
+    | 4 |
+    +---+
+    "
+    );
+
     Ok(())
 }
 
@@ -120,25 +124,32 @@ async fn test_single_local() -> Result<()> {
         4, // fetch
     );
     // expected to push the limit to the AggregateExec
-    let expected = [
-        "LocalLimitExec: fetch=4",
-        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-        "DataSourceExec: partitions=1, partition_sizes=[1]",
-    ];
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    assert_plan_matches_expected(&plan, &expected)?;
-    let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-    .trim();
-    assert_results_match_expected(plan, expected).await?;
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=4
+      AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]
+        DataSourceExec: partitions=1, partition_sizes=[1]
+    "
+    );
+    let expected = assert_results_match_expected(plan).await?;
+    let actual = expected.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    +---+
+    | a |
+    +---+
+    | 1 |
+    | 2 |
+    |   |
+    | 4 |
+    +---+
+    "
+    );
     Ok(())
 }
 
@@ -162,24 +173,31 @@ async fn test_single_global() -> Result<()> {
         Some(3), // fetch
     );
     // expected to push the skip+fetch limit to the AggregateExec
-    let expected = [
-        "GlobalLimitExec: skip=1, fetch=3",
-        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-        "DataSourceExec: partitions=1, partition_sizes=[1]",
-    ];
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    assert_plan_matches_expected(&plan, &expected)?;
-    let expected = r#"
-+---+
-| a |
-+---+
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-    .trim();
-    assert_results_match_expected(plan, expected).await?;
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    GlobalLimitExec: skip=1, fetch=3
+      AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]
+        DataSourceExec: partitions=1, partition_sizes=[1]
+    "
+    );
+    let expected = assert_results_match_expected(plan).await?;
+    let actual = expected.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    +---+
+    | a |
+    +---+
+    | 2 |
+    |   |
+    | 4 |
+    +---+
+    "
+    );
     Ok(())
 }
 
@@ -210,26 +228,33 @@ async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
         4, // fetch
     );
     // expected to push the limit to the outer AggregateExec only
-    let expected = [
-        "LocalLimitExec: fetch=4",
-        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-        "AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[]",
-        "DataSourceExec: partitions=1, partition_sizes=[1]",
-    ];
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    assert_plan_matches_expected(&plan, &expected)?;
-    let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-    .trim();
-    assert_results_match_expected(plan, expected).await?;
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=4
+      AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]
+        AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[]
+          DataSourceExec: partitions=1, partition_sizes=[1]
+    "
+    );
+    let expected = assert_results_match_expected(plan).await?;
+    let actual = expected.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    +---+
+    | a |
+    +---+
+    | 1 |
+    | 2 |
+    |   |
+    | 4 |
+    +---+
+    "
+    );
     Ok(())
 }
 
@@ -258,13 +283,17 @@ fn test_has_order_by() -> Result<()> {
         10, // fetch
     );
     // expected not to push the limit to the AggregateExec
-    let expected = [
-        "LocalLimitExec: fetch=10",
-        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], ordering_mode=Sorted",
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
-    let plan = Arc::new(limit_exec) as _;
-    assert_plan_matches_expected(&plan, &expected)?;
+    let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=10
+      AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], ordering_mode=Sorted
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+    "
+    );
     Ok(())
 }
 
@@ -287,13 +316,17 @@ fn test_no_group_by() -> Result<()> {
         10, // fetch
     );
     // expected not to push the limit to the AggregateExec
-    let expected = [
-        "LocalLimitExec: fetch=10",
-        "AggregateExec: mode=Single, gby=[], aggr=[]",
-        "DataSourceExec: partitions=1, partition_sizes=[1]",
-    ];
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    assert_plan_matches_expected(&plan, &expected)?;
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=10
+      AggregateExec: mode=Single, gby=[], aggr=[]
+        DataSourceExec: partitions=1, partition_sizes=[1]
+    "
+    );
     Ok(())
 }
 
@@ -317,13 +350,17 @@ fn test_has_aggregate_expression() -> Result<()> {
         10, // fetch
     );
     // expected not to push the limit to the AggregateExec
-    let expected = [
-        "LocalLimitExec: fetch=10",
-        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
-        "DataSourceExec: partitions=1, partition_sizes=[1]",
-    ];
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    assert_plan_matches_expected(&plan, &expected)?;
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=10
+      AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]
+        DataSourceExec: partitions=1, partition_sizes=[1]
+    "
+    );
     Ok(())
 }
 
@@ -355,12 +392,16 @@ fn test_has_filter() -> Result<()> {
     );
     // expected not to push the limit to the AggregateExec
     // TODO(msirek): open an issue for `filter_expr` of `AggregateExec` not printing out
-    let expected = [
-        "LocalLimitExec: fetch=10",
-        "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
-        "DataSourceExec: partitions=1, partition_sizes=[1]",
-    ];
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    assert_plan_matches_expected(&plan, &expected)?;
+    let formatted = plan_matches_expected(&plan)?;
+    let actual = formatted.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=10
+      AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]
+        DataSourceExec: partitions=1, partition_sizes=[1]
+    "
+    );
     Ok(())
 }
diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
index 1f8aad0f23345..585bb3fb360f0 100644
--- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
@@ -59,6 +59,7 @@ use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::union::UnionExec;
 use datafusion_physical_plan::{get_plan_string, ExecutionPlan};
 
+use insta::assert_snapshot;
 use itertools::Itertools;
 
 /// Mocked UDF
@@ -424,19 +425,27 @@ fn test_csv_after_projection() -> Result<()> {
         ],
         csv.clone(),
     )?);
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-        "ProjectionExec: expr=[b@2 as b, d@0 as d]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[d, c, b], file_type=csv, has_header=false",
-    ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[b@2 as b, d@0 as d]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[d, c, b], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected =
-        ["DataSourceExec: file_groups={1 group: [[x]]}, projection=[b, d], file_type=csv, has_header=false"];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[b, d], file_type=csv, has_header=false"
+    );
 
     Ok(())
 }
@@ -452,18 +461,28 @@ fn test_memory_after_projection() -> Result<()> {
         ],
         memory.clone(),
     )?);
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-        "ProjectionExec: expr=[d@2 as d, e@3 as e, a@1 as a]",
-        "  DataSourceExec: partitions=0, partition_sizes=[]",
-    ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[d@2 as d, e@3 as e, a@1 as a]
+      DataSourceExec: partitions=0, partition_sizes=[]
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = ["DataSourceExec: partitions=0, partition_sizes=[]"];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @"DataSourceExec: partitions=0, partition_sizes=[]"
+    );
+
     assert_eq!(
         after_optimize
             .clone()
@@ -630,22 +649,31 @@ fn test_projection_after_projection() -> Result<()> {
         child_projection.clone(),
     )?);
 
-    let initial = get_plan_string(&top_projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[new_b@3 as new_b, c@0 + new_e@1 as binary, new_b@3 as newest_b]",
-            "  ProjectionExec: expr=[c@2 as c, e@4 as new_e, a@0 as a, b@1 as new_b]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&top_projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[new_b@3 as new_b, c@0 + new_e@1 as binary, new_b@3 as newest_b]
+      ProjectionExec: expr=[c@2 as c, e@4 as new_e, a@0 as a, b@1 as new_b]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(top_projection, &ConfigOptions::new())?;
 
-    let expected = [
-            "ProjectionExec: expr=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b]",
-            "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -686,24 +714,33 @@ fn test_output_req_after_projection() -> Result<()> {
         sort_req.clone(),
     )?);
 
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "  OutputRequirementExec",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+      OutputRequirementExec
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected: [&str; 3] = [
-            "OutputRequirementExec",
-            "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-        ];
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    OutputRequirementExec
+      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
-    assert_eq!(get_plan_string(&after_optimize), expected);
     let expected_reqs = OrderingRequirements::new(
         [
             PhysicalSortRequirement::new(
@@ -766,23 +803,32 @@ fn test_coalesce_partitions_after_projection() -> Result<()> {
         ],
         coalesce_partitions,
     )?);
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-                "ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]",
-                "  CoalescePartitionsExec",
-                "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]
+      CoalescePartitionsExec
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-                "CoalescePartitionsExec",
-                "  ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]",
-                "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    CoalescePartitionsExec
+      ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -813,23 +859,32 @@ fn test_filter_after_projection() -> Result<()> {
         filter.clone(),
     )?) as _;
 
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-                "ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]",
-                "  FilterExec: b@1 - a@0 > d@3 - a@0",
-                "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]
+      FilterExec: b@1 - a@0 > d@3 - a@0
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-                "FilterExec: b@1 - a_new@0 > d@2 - a_new@0",
-                "  ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]",
-                "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    FilterExec: b@1 - a_new@0 > d@2 - a_new@0
+      ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -898,26 +953,35 @@ fn test_join_after_projection() -> Result<()> {
         ],
         join,
     )?) as _;
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, a@5 as a_from_right, c@7 as c_from_right]",
-            "  SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, a@5 as a_from_right, c@7 as c_from_right]
+      SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-            "SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b_from_left@1, c_from_right@1)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2",
-            "  ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "  ProjectionExec: expr=[a@0 as a_from_right, c@2 as c_from_right]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b_from_left@1, c_from_right@1)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2
+      ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      ProjectionExec: expr=[a@0 as a_from_right, c@2 as c_from_right]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let expected_filter_col_ind = vec![
         ColumnIndex {
@@ -1017,25 +1081,34 @@ fn test_join_after_required_projection() -> Result<()> {
         ],
         join,
     )?) as _;
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]",
-            "  SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]
+      SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-            "ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]",
-            "  SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]
+      SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
     Ok(())
 }
 
@@ -1085,23 +1158,31 @@ fn test_nested_loop_join_after_projection() -> Result<()> {
         vec![(col_left_c, "c".to_string())],
         Arc::clone(&join),
     )?) as _;
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[c@2 as c]",
-            "  NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c]
+      NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
-    let after_optimize =
+    let after_optimize_string =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
-    let expected = [
-            "NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1, projection=[c@2]",
-            "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize_string).join("\n");
+    let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1, projection=[c@2]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+
+    );
     Ok(())
 }
 
@@ -1169,18 +1250,33 @@ fn test_hash_join_after_projection() -> Result<()> {
         ],
         join.clone(),
     )?) as _;
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-			"ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@7 as c_from_right]", "  HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@7 as c_from_right]
+      HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
 
     // HashJoinExec only returns result after projection. Because there are some alias columns in the projection, the ProjectionExec is not removed.
-    let expected = ["ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@3 as c_from_right]", "  HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@3 as c_from_right]
+      HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let projection = Arc::new(ProjectionExec::try_new(
         vec![
@@ -1194,10 +1290,18 @@ fn test_hash_join_after_projection() -> Result<()> {
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
 
     // Comparing to the previous result, this projection don't have alias columns either change the order of output fields. So the ProjectionExec is removed.
-    let expected = ["HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    assert_snapshot!(
+        actual,
+        @r"
+    HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -1224,23 +1328,30 @@ fn test_repartition_after_projection() -> Result<()> {
         ],
         repartition,
     )?) as _;
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-                "ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]",
-                "  RepartitionExec: partitioning=Hash([a@0, b@1, d@3], 6), input_partitions=1",
-                "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]
+      RepartitionExec: partitioning=Hash([a@0, b@1, d@3], 6), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-                "RepartitionExec: partitioning=Hash([a@1, b_new@0, d_new@2], 6), input_partitions=1",
-                "  ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]",
-                "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    RepartitionExec: partitioning=Hash([a@1, b_new@0, d_new@2], 6), input_partitions=1
+      ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     assert_eq!(
         after_optimize
@@ -1286,23 +1397,30 @@ fn test_sort_after_projection() -> Result<()> {
         Arc::new(sort_exec),
     )?) as _;
 
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "  SortExec: expr=[b@1 ASC, c@2 + a@0 ASC], preserve_partitioning=[false]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+      SortExec: expr=[b@1 ASC, c@2 + a@0 ASC], preserve_partitioning=[false]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-            "SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]",
-            "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]
+      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -1331,23 +1449,30 @@ fn test_sort_preserving_after_projection() -> Result<()> {
         Arc::new(sort_exec),
     )?) as _;
 
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "  SortPreservingMergeExec: [b@1 ASC, c@2 + a@0 ASC]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+      SortPreservingMergeExec: [b@1 ASC, c@2 + a@0 ASC]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-            "SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]",
-            "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]
+      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -1365,29 +1490,36 @@ fn test_union_after_projection() -> Result<()> {
         union.clone(),
     )?) as _;
 
-    let initial = get_plan_string(&projection);
-    let expected_initial = [
-            "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "  UnionExec",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-            ];
-    assert_eq!(initial, expected_initial);
+    let initial = get_plan_string(&projection).join("\n");
+    let actual = initial.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+      UnionExec
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-            "UnionExec",
-            "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-            "  ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]",
-            "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"
-        ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    UnionExec
+      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -1439,11 +1571,15 @@ fn test_partition_col_projection_pushdown() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-        "ProjectionExec: expr=[string_col@1 as string_col, partition_col@2 as partition_col, int_col@0 as int_col]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false"
-    ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[string_col@1 as string_col, partition_col@2 as partition_col, int_col@0 as int_col]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
@@ -1479,11 +1615,15 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let expected = [
-        "ProjectionExec: expr=[string_col@1 as string_col, CAST(partition_col@2 AS Utf8View) as partition_col, int_col@0 as int_col]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false"
-    ];
-    assert_eq!(get_plan_string(&after_optimize), expected);
+    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let actual = after_optimize_string.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    ProjectionExec: expr=[string_col@1 as string_col, CAST(partition_col@2 AS Utf8View) as partition_col, int_col@0 as int_col]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false
+    "
+    );
 
     Ok(())
 }
diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
index f7d68e5d899c9..5d62ea4ccb20e 100644
--- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use insta::assert_snapshot;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
@@ -398,14 +399,6 @@ fn assert_sanity_check(plan: &Arc<dyn ExecutionPlan>, is_sane: bool) {
     );
 }
 
-/// Check if the plan we created is as expected by comparing the plan
-/// formatted as a string.
-fn assert_plan(plan: &dyn ExecutionPlan, expected_lines: Vec<&str>) {
-    let plan_str = displayable(plan).indent(true).to_string();
-    let actual_lines: Vec<&str> = plan_str.trim().lines().collect();
-    assert_eq!(actual_lines, expected_lines);
-}
-
 #[tokio::test]
 /// Tests that plan is valid when the sort requirements are satisfied.
 async fn test_bounded_window_agg_sort_requirement() -> Result<()> {
@@ -422,11 +415,16 @@ async fn test_bounded_window_agg_sort_requirement() -> Result<()> {
     .into();
     let sort = sort_exec(ordering.clone(), source);
     let bw = bounded_window_exec("c9", ordering, sort);
-    assert_plan(bw.as_ref(), vec![
-        "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-        "  SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]",
-        "    DataSourceExec: partitions=1, partition_sizes=[0]"
-    ]);
+    let plan_str = displayable(bw.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r#"
+    BoundedWindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+      SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
+        DataSourceExec: partitions=1, partition_sizes=[0]
+    "#
+    );
     assert_sanity_check(&bw, true);
     Ok(())
 }
@@ -445,10 +443,15 @@ async fn test_bounded_window_agg_no_sort_requirement() -> Result<()> {
         },
     )];
     let bw = bounded_window_exec("c9", sort_exprs, source);
-    assert_plan(bw.as_ref(), vec![
-        "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-        "  DataSourceExec: partitions=1, partition_sizes=[0]"
-    ]);
+    let plan_str = displayable(bw.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r#"
+    BoundedWindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+      DataSourceExec: partitions=1, partition_sizes=[0]
+    "#
+    );
     // Order requirement of the `BoundedWindowAggExec` is not satisfied. We expect to receive error during sanity check.
     assert_sanity_check(&bw, false);
     Ok(())
@@ -462,12 +465,14 @@ async fn test_global_limit_single_partition() -> Result<()> {
     let source = memory_exec(&schema);
     let limit = global_limit_exec(source, 0, Some(100));
 
-    assert_plan(
-        limit.as_ref(),
-        vec![
-            "GlobalLimitExec: skip=0, fetch=100",
-            "  DataSourceExec: partitions=1, partition_sizes=[0]",
-        ],
+    let plan_str = displayable(limit.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    GlobalLimitExec: skip=0, fetch=100
+      DataSourceExec: partitions=1, partition_sizes=[0]
+    "
     );
     assert_sanity_check(&limit, true);
     Ok(())
@@ -481,13 +486,15 @@ async fn test_global_limit_multi_partition() -> Result<()> {
     let source = memory_exec(&schema);
     let limit = global_limit_exec(repartition_exec(source), 0, Some(100));
 
-    assert_plan(
-        limit.as_ref(),
-        vec![
-            "GlobalLimitExec: skip=0, fetch=100",
-            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "    DataSourceExec: partitions=1, partition_sizes=[0]",
-        ],
+    let plan_str = displayable(limit.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    GlobalLimitExec: skip=0, fetch=100
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: partitions=1, partition_sizes=[0]
+    "
     );
     // Distribution requirement of the `GlobalLimitExec` is not satisfied. We expect to receive error during sanity check.
     assert_sanity_check(&limit, false);
@@ -501,12 +508,14 @@ async fn test_local_limit() -> Result<()> {
     let source = memory_exec(&schema);
     let limit = local_limit_exec(source, 100);
 
-    assert_plan(
-        limit.as_ref(),
-        vec![
-            "LocalLimitExec: fetch=100",
-            "  DataSourceExec: partitions=1, partition_sizes=[0]",
-        ],
+    let plan_str = displayable(limit.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    LocalLimitExec: fetch=100
+      DataSourceExec: partitions=1, partition_sizes=[0]
+    "
     );
     assert_sanity_check(&limit, true);
     Ok(())
@@ -540,17 +549,19 @@ async fn test_sort_merge_join_satisfied() -> Result<()> {
     let join_ty = JoinType::Inner;
     let smj = sort_merge_join_exec(left, right, &join_on, &join_ty);
 
-    assert_plan(
-        smj.as_ref(),
-        vec![
-            "SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]",
-            "  RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1",
-            "    SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]",
-            "      DataSourceExec: partitions=1, partition_sizes=[0]",
-            "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1",
-            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "      DataSourceExec: partitions=1, partition_sizes=[0]",
-        ],
+    let plan_str = displayable(smj.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]
+      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1
+        SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]
+          DataSourceExec: partitions=1, partition_sizes=[0]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+        SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+          DataSourceExec: partitions=1, partition_sizes=[0]
+    "
     );
     assert_sanity_check(&smj, true);
     Ok(())
@@ -588,16 +599,18 @@ async fn test_sort_merge_join_order_missing() -> Result<()> {
     let join_ty = JoinType::Inner;
     let smj = sort_merge_join_exec(left, right, &join_on, &join_ty);
 
-    assert_plan(
-        smj.as_ref(),
-        vec![
-            "SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]",
-            "  RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1",
-            "    SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]",
-            "      DataSourceExec: partitions=1, partition_sizes=[0]",
-            "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1",
-            "    DataSourceExec: partitions=1, partition_sizes=[0]",
-        ],
+    let plan_str = displayable(smj.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]
+      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1
+        SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]
+          DataSourceExec: partitions=1, partition_sizes=[0]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+        DataSourceExec: partitions=1, partition_sizes=[0]
+    "
     );
     // Order requirement for the `SortMergeJoin` is not satisfied for right child. We expect to receive error during sanity check.
     assert_sanity_check(&smj, false);
@@ -634,17 +647,19 @@ async fn test_sort_merge_join_dist_missing() -> Result<()> {
     let join_ty = JoinType::Inner;
     let smj = sort_merge_join_exec(left, right, &join_on, &join_ty);
 
-    assert_plan(
-        smj.as_ref(),
-        vec![
-            "SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]",
-            "  RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1",
-            "    SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]",
-            "      DataSourceExec: partitions=1, partition_sizes=[0]",
-            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "      DataSourceExec: partitions=1, partition_sizes=[0]",
-        ],
+    let plan_str = displayable(smj.as_ref()).indent(true).to_string();
+    let actual = plan_str.trim();
+    assert_snapshot!(
+        actual,
+        @r"
+    SortMergeJoin: join_type=Inner, on=[(c9@0, a@0)]
+      RepartitionExec: partitioning=Hash([c9@0], 10), input_partitions=1
+        SortExec: expr=[c9@0 ASC], preserve_partitioning=[false]
+          DataSourceExec: partitions=1, partition_sizes=[0]
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+          DataSourceExec: partitions=1, partition_sizes=[0]
+    "
     );
     // Distribution requirement for the `SortMergeJoin` is not satisfied for right child (has round-robin partitioning). We expect to receive error during sanity check.
     assert_sanity_check(&smj, false);
diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs
index c91a70989be49..d1b51adf73e28 100644
--- a/datafusion/core/tests/physical_optimizer/test_utils.rs
+++ b/datafusion/core/tests/physical_optimizer/test_utils.rs
@@ -509,13 +509,6 @@ pub fn check_integrity<T: Clone>(context: PlanContext<T>) -> Result<PlanContext<
         .data()
 }
 
-pub fn trim_plan_display(plan: &str) -> Vec<&str> {
-    plan.split('\n')
-        .map(|s| s.trim())
-        .filter(|s| !s.is_empty())
-        .collect()
-}
-
 // construct a stream partition for test purposes
 #[derive(Debug)]
 pub struct TestStreamPartition {
@@ -629,25 +622,15 @@ pub fn build_group_by(input_schema: &SchemaRef, columns: Vec<String>) -> Physica
     PhysicalGroupBy::new_single(group_by_expr.clone())
 }
 
-pub fn assert_plan_matches_expected(
-    plan: &Arc<dyn ExecutionPlan>,
-    expected: &[&str],
-) -> Result<()> {
-    let expected_lines: Vec<&str> = expected.to_vec();
+pub fn plan_matches_expected(plan: &Arc<dyn ExecutionPlan>) -> Result<String> {
     let config = ConfigOptions::new();
 
     let optimized =
         LimitedDistinctAggregation::new().optimize(Arc::clone(plan), &config)?;
 
     let optimized_result = displayable(optimized.as_ref()).indent(true).to_string();
-    let actual_lines = trim_plan_display(&optimized_result);
-
-    assert_eq!(
-        &expected_lines, &actual_lines,
-        "\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n"
-    );
 
-    Ok(())
+    Ok(optimized_result)
 }
 
 /// Describe the type of aggregate being tested
diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index 729542d27e3fd..7ac63210a9671 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -13,7 +13,9 @@
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
-// under the License.
+// under the License.\
+
+use insta::assert_snapshot;
 
 use datafusion::assert_batches_eq;
 use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable};
@@ -62,28 +64,21 @@ async fn join_change_in_planner() -> Result<()> {
     let dataframe = ctx.sql(sql).await?;
     let physical_plan = dataframe.create_physical_plan().await?;
     let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-    let expected = {
-        [
-            "SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            // "     DataSourceExec: file_groups={1 group: [[tempdir/left.csv]]}, projection=[a1, a2], file_type=csv, has_header=false",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            // "     DataSourceExec: file_groups={1 group: [[tempdir/right.csv]]}, projection=[a1, a2], file_type=csv, has_header=false"
-        ]
-    };
-    let mut actual: Vec<&str> = formatted.trim().lines().collect();
-    // Remove CSV lines
-    actual.remove(4);
-    actual.remove(7);
+    let actual = formatted.trim();
 
-    assert_eq!(
-        expected,
-        actual[..],
-        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    assert_snapshot!(
+        actual,
+        @r"
+    SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10
+      CoalesceBatchesExec: target_batch_size=8192
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST
+          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+      CoalesceBatchesExec: target_batch_size=8192
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST
+          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+    "
     );
     Ok(())
 }
@@ -130,28 +125,21 @@ async fn join_no_order_on_filter() -> Result<()> {
     let dataframe = ctx.sql(sql).await?;
     let physical_plan = dataframe.create_physical_plan().await?;
     let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-    let expected = {
-        [
-            "SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a3@0 AS Int64) > CAST(a3@1 AS Int64) + 3 AND CAST(a3@0 AS Int64) < CAST(a3@1 AS Int64) + 10",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            // "     DataSourceExec: file_groups={1 group: [[tempdir/left.csv]]}, projection=[a1, a2], file_type=csv, has_header=false",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            // "     DataSourceExec: file_groups={1 group: [[tempdir/right.csv]]}, projection=[a1, a2], file_type=csv, has_header=false"
-        ]
-    };
-    let mut actual: Vec<&str> = formatted.trim().lines().collect();
-    // Remove CSV lines
-    actual.remove(4);
-    actual.remove(7);
+    let actual = formatted.trim();
 
-    assert_eq!(
-        expected,
-        actual[..],
-        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    assert_snapshot!(
+        actual,
+        @r"
+    SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a3@0 AS Int64) > CAST(a3@1 AS Int64) + 3 AND CAST(a3@0 AS Int64) < CAST(a3@1 AS Int64) + 10
+      CoalesceBatchesExec: target_batch_size=8192
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
+          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+            StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+      CoalesceBatchesExec: target_batch_size=8192
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
+          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+            StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+    "
     );
     Ok(())
 }
@@ -180,28 +168,21 @@ async fn join_change_in_planner_without_sort() -> Result<()> {
     let dataframe = ctx.sql(sql).await?;
     let physical_plan = dataframe.create_physical_plan().await?;
     let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-    let expected = {
-        [
-            "SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            // "     DataSourceExec: file_groups={1 group: [[tempdir/left.csv]]}, projection=[a1, a2], file_type=csv, has_header=false",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            // "     DataSourceExec: file_groups={1 group: [[tempdir/right.csv]]}, projection=[a1, a2], file_type=csv, has_header=false"
-        ]
-    };
-    let mut actual: Vec<&str> = formatted.trim().lines().collect();
-    // Remove CSV lines
-    actual.remove(4);
-    actual.remove(7);
+    let actual = formatted.trim();
 
-    assert_eq!(
-        expected,
-        actual[..],
-        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    assert_snapshot!(
+        actual,
+        @r"
+    SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10
+      CoalesceBatchesExec: target_batch_size=8192
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
+          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
+      CoalesceBatchesExec: target_batch_size=8192
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
+          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
+    "
     );
     Ok(())
 }

From 61acb341f172d9a56284b7c602e0ceebbf5d0eb6 Mon Sep 17 00:00:00 2001
From: Ian Lai <Ian.Lai@senao.com>
Date: Mon, 30 Jun 2025 07:29:05 +0000
Subject: [PATCH 06/11] fix format

---
 .../core/tests/physical_optimizer/enforce_sorting.rs       | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index a6e7cded15b03..06395eb41e17c 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -86,10 +86,10 @@ fn csv_exec_sorted(
 /// Runs the sort enforcement optimizer and asserts the plan
 /// against the original and expected plans
 ///
+/// `$EXPECTED_PLAN_LINES`: input plan
+/// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
 /// `$PLAN`: the plan to optimized
-/// `$REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
-/// `$EXPECTED_PLAN_COMBINED`: The expected combined plan string with the original and optimized plans.
-///
+/// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
 macro_rules! assert_optimized {
     ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => {
         let mut config = ConfigOptions::new();
@@ -140,7 +140,6 @@ macro_rules! assert_optimized {
             // TODO: End state payloads will be checked here.
         }
 
-
         let physical_plan = $PLAN;
         let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
         let actual: Vec<&str> = formatted.trim().lines().collect();

From 773ccc10ba95bb47f67e73064448b9c6f0ef4a85 Mon Sep 17 00:00:00 2001
From: Ian Lai <Ian.Lai@senao.com>
Date: Mon, 30 Jun 2025 07:30:11 +0000
Subject: [PATCH 07/11] fix format

---
 datafusion/core/tests/physical_optimizer/enforce_sorting.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index 06395eb41e17c..38bc10a967e2b 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -90,6 +90,7 @@ fn csv_exec_sorted(
 /// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
 /// `$PLAN`: the plan to optimized
 /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
+///
 macro_rules! assert_optimized {
     ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => {
         let mut config = ConfigOptions::new();

From 792e5d6100cad4b50cdad47aff6427a5373e8ac0 Mon Sep 17 00:00:00 2001
From: Ian Lai <Ian.Lai@senao.com>
Date: Mon, 30 Jun 2025 07:44:04 +0000
Subject: [PATCH 08/11] fix typo

---
 datafusion/core/tests/sql/joins.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index 7ac63210a9671..fbe7e3a00f542 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -13,7 +13,7 @@
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
-// under the License.\
+// under the License.
 
 use insta::assert_snapshot;
 

From f5652e813ce5c717d2c95a552d363957e4cc74f5 Mon Sep 17 00:00:00 2001
From: Ian Lai <Ian.Lai@senao.com>
Date: Tue, 1 Jul 2025 05:35:42 +0000
Subject: [PATCH 09/11] refactor: rename function

---
 .../combine_partial_final_agg.rs              |  2 --
 .../limited_distinct_aggregation.rs           | 28 +++++++++----------
 .../tests/physical_optimizer/test_utils.rs    |  2 +-
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
index 7e31600cdc4d5..9c76f6ab6f58b 100644
--- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs
@@ -45,8 +45,6 @@ use datafusion_physical_plan::ExecutionPlan;
 /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
 macro_rules! assert_optimized {
     ($PLAN: expr, @ $EXPECTED_LINES: literal $(,)?) => {
-        // let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
-
         // run optimizer
         let optimizer = CombinePartialFinalAggregate {};
         let config = ConfigOptions::new();
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index 8e4f25cb11dff..22b5fefeedcde 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -21,7 +21,7 @@ use insta::assert_snapshot;
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
-    build_group_by, mock_data, parquet_exec_with_sort, plan_matches_expected, schema,
+    build_group_by, get_optimized_plan, mock_data, parquet_exec_with_sort, schema,
     TestAggregate,
 };
 
@@ -40,7 +40,7 @@ use datafusion_physical_plan::{
     ExecutionPlan,
 };
 
-async fn assert_results_match_expected(plan: Arc<dyn ExecutionPlan>) -> Result<String> {
+async fn run_plan_and_format(plan: Arc<dyn ExecutionPlan>) -> Result<String> {
     let cfg = SessionConfig::new().with_target_partitions(1);
     let ctx = SessionContext::new_with_config(cfg);
     let batches = collect(plan, ctx.task_ctx()).await?;
@@ -75,7 +75,7 @@ async fn test_partial_final() -> Result<()> {
         4, // fetch
     );
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
@@ -86,7 +86,7 @@ async fn test_partial_final() -> Result<()> {
           DataSourceExec: partitions=1, partition_sizes=[1]
     "
     );
-    let expected = assert_results_match_expected(plan).await?;
+    let expected = run_plan_and_format(plan).await?;
     let actual = expected.trim();
     assert_snapshot!(
         actual,
@@ -125,7 +125,7 @@ async fn test_single_local() -> Result<()> {
     );
     // expected to push the limit to the AggregateExec
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
@@ -135,7 +135,7 @@ async fn test_single_local() -> Result<()> {
         DataSourceExec: partitions=1, partition_sizes=[1]
     "
     );
-    let expected = assert_results_match_expected(plan).await?;
+    let expected = run_plan_and_format(plan).await?;
     let actual = expected.trim();
     assert_snapshot!(
         actual,
@@ -174,7 +174,7 @@ async fn test_single_global() -> Result<()> {
     );
     // expected to push the skip+fetch limit to the AggregateExec
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
@@ -184,7 +184,7 @@ async fn test_single_global() -> Result<()> {
         DataSourceExec: partitions=1, partition_sizes=[1]
     "
     );
-    let expected = assert_results_match_expected(plan).await?;
+    let expected = run_plan_and_format(plan).await?;
     let actual = expected.trim();
     assert_snapshot!(
         actual,
@@ -229,7 +229,7 @@ async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
     );
     // expected to push the limit to the outer AggregateExec only
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
@@ -240,7 +240,7 @@ async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
           DataSourceExec: partitions=1, partition_sizes=[1]
     "
     );
-    let expected = assert_results_match_expected(plan).await?;
+    let expected = run_plan_and_format(plan).await?;
     let actual = expected.trim();
     assert_snapshot!(
         actual,
@@ -284,7 +284,7 @@ fn test_has_order_by() -> Result<()> {
     );
     // expected not to push the limit to the AggregateExec
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
@@ -317,7 +317,7 @@ fn test_no_group_by() -> Result<()> {
     );
     // expected not to push the limit to the AggregateExec
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
@@ -351,7 +351,7 @@ fn test_has_aggregate_expression() -> Result<()> {
     );
     // expected not to push the limit to the AggregateExec
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
@@ -393,7 +393,7 @@ fn test_has_filter() -> Result<()> {
     // expected not to push the limit to the AggregateExec
     // TODO(msirek): open an issue for `filter_expr` of `AggregateExec` not printing out
     let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-    let formatted = plan_matches_expected(&plan)?;
+    let formatted = get_optimized_plan(&plan)?;
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs
index d1b51adf73e28..7fb0f795f2944 100644
--- a/datafusion/core/tests/physical_optimizer/test_utils.rs
+++ b/datafusion/core/tests/physical_optimizer/test_utils.rs
@@ -622,7 +622,7 @@ pub fn build_group_by(input_schema: &SchemaRef, columns: Vec<String>) -> Physica
     PhysicalGroupBy::new_single(group_by_expr.clone())
 }
 
-pub fn plan_matches_expected(plan: &Arc<dyn ExecutionPlan>) -> Result<String> {
+pub fn get_optimized_plan(plan: &Arc<dyn ExecutionPlan>) -> Result<String> {
     let config = ConfigOptions::new();
 
     let optimized =

From 40497e8cbca13ce2ac1e2b9f9df20b4e3b968bad Mon Sep 17 00:00:00 2001
From: Cheng-Yuan-Lai <a186235@g,ail.com>
Date: Wed, 2 Jul 2025 22:37:02 +0800
Subject: [PATCH 10/11] fix: remove trimming

---
 .../limited_distinct_aggregation.rs                  | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index 22b5fefeedcde..ad15d6803413b 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -87,9 +87,8 @@ async fn test_partial_final() -> Result<()> {
     "
     );
     let expected = run_plan_and_format(plan).await?;
-    let actual = expected.trim();
     assert_snapshot!(
-        actual,
+        expected,
         @r"
     +---+
     | a |
@@ -136,9 +135,8 @@ async fn test_single_local() -> Result<()> {
     "
     );
     let expected = run_plan_and_format(plan).await?;
-    let actual = expected.trim();
     assert_snapshot!(
-        actual,
+        expected,
         @r"
     +---+
     | a |
@@ -185,9 +183,8 @@ async fn test_single_global() -> Result<()> {
     "
     );
     let expected = run_plan_and_format(plan).await?;
-    let actual = expected.trim();
     assert_snapshot!(
-        actual,
+        expected,
         @r"
     +---+
     | a |
@@ -241,9 +238,8 @@ async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
     "
     );
     let expected = run_plan_and_format(plan).await?;
-    let actual = expected.trim();
     assert_snapshot!(
-        actual,
+        expected,
         @r"
     +---+
     | a |

From f10d7458474e19872fb445f954a1ae8311121ce3 Mon Sep 17 00:00:00 2001
From: Cheng-Yuan-Lai <a186235@g,ail.com>
Date: Wed, 2 Jul 2025 23:22:44 +0800
Subject: [PATCH 11/11] refactor: replace get_plan_string with displayable in
 projection_pushdown

---
 .../physical_optimizer/projection_pushdown.rs | 122 ++++++++++++------
 1 file changed, 79 insertions(+), 43 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
index 585bb3fb360f0..6964965a6431a 100644
--- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
@@ -57,7 +57,7 @@ use datafusion_physical_plan::sorts::sort::SortExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::union::UnionExec;
-use datafusion_physical_plan::{get_plan_string, ExecutionPlan};
+use datafusion_physical_plan::{displayable, ExecutionPlan};
 
 use insta::assert_snapshot;
 use itertools::Itertools;
@@ -425,7 +425,7 @@ fn test_csv_after_projection() -> Result<()> {
         ],
         csv.clone(),
     )?);
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -439,7 +439,9 @@ fn test_csv_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -461,7 +463,7 @@ fn test_memory_after_projection() -> Result<()> {
         ],
         memory.clone(),
     )?);
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -475,7 +477,9 @@ fn test_memory_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -649,7 +653,9 @@ fn test_projection_after_projection() -> Result<()> {
         child_projection.clone(),
     )?);
 
-    let initial = get_plan_string(&top_projection).join("\n");
+    let initial = displayable(top_projection.as_ref())
+        .indent(true)
+        .to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -664,7 +670,9 @@ fn test_projection_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(top_projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -714,7 +722,7 @@ fn test_output_req_after_projection() -> Result<()> {
         sort_req.clone(),
     )?);
 
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -729,7 +737,9 @@ fn test_output_req_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -803,7 +813,7 @@ fn test_coalesce_partitions_after_projection() -> Result<()> {
         ],
         coalesce_partitions,
     )?);
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -818,7 +828,9 @@ fn test_coalesce_partitions_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -850,7 +862,7 @@ fn test_filter_after_projection() -> Result<()> {
         )),
     ));
     let filter = Arc::new(FilterExec::try_new(predicate, csv)?);
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("a", 0)), "a_new".to_string()),
             (Arc::new(Column::new("b", 1)), "b".to_string()),
@@ -859,7 +871,7 @@ fn test_filter_after_projection() -> Result<()> {
         filter.clone(),
     )?) as _;
 
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -874,7 +886,9 @@ fn test_filter_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -943,7 +957,7 @@ fn test_join_after_projection() -> Result<()> {
         None,
         StreamJoinPartitionMode::SinglePartition,
     )?);
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("c", 2)), "c_from_left".to_string()),
             (Arc::new(Column::new("b", 1)), "b_from_left".to_string()),
@@ -953,7 +967,7 @@ fn test_join_after_projection() -> Result<()> {
         ],
         join,
     )?) as _;
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -969,7 +983,9 @@ fn test_join_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -1066,7 +1082,7 @@ fn test_join_after_required_projection() -> Result<()> {
         None,
         StreamJoinPartitionMode::SinglePartition,
     )?);
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("a", 5)), "a".to_string()),
             (Arc::new(Column::new("b", 6)), "b".to_string()),
@@ -1081,7 +1097,7 @@ fn test_join_after_required_projection() -> Result<()> {
         ],
         join,
     )?) as _;
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
 
     assert_snapshot!(
@@ -1097,7 +1113,9 @@ fn test_join_after_required_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     assert_snapshot!(
@@ -1154,11 +1172,11 @@ fn test_nested_loop_join_after_projection() -> Result<()> {
         None,
     )?) as _;
 
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![(col_left_c, "c".to_string())],
         Arc::clone(&join),
     )?) as _;
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
     assert_snapshot!(
         actual,
@@ -1172,7 +1190,9 @@ fn test_nested_loop_join_after_projection() -> Result<()> {
 
     let after_optimize_string =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
-    let after_optimize_string = get_plan_string(&after_optimize_string).join("\n");
+    let after_optimize_string = displayable(after_optimize_string.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
@@ -1241,7 +1261,7 @@ fn test_hash_join_after_projection() -> Result<()> {
         PartitionMode::Auto,
         NullEquality::NullEqualsNull,
     )?);
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("c", 2)), "c_from_left".to_string()),
             (Arc::new(Column::new("b", 1)), "b_from_left".to_string()),
@@ -1250,7 +1270,7 @@ fn test_hash_join_after_projection() -> Result<()> {
         ],
         join.clone(),
     )?) as _;
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
     assert_snapshot!(
         actual,
@@ -1264,7 +1284,9 @@ fn test_hash_join_after_projection() -> Result<()> {
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     // HashJoinExec only returns result after projection. Because there are some alias columns in the projection, the ProjectionExec is not removed.
@@ -1290,7 +1312,9 @@ fn test_hash_join_after_projection() -> Result<()> {
 
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
 
     // Comparing to the previous result, this projection don't have alias columns either change the order of output fields. So the ProjectionExec is removed.
@@ -1320,7 +1344,7 @@ fn test_repartition_after_projection() -> Result<()> {
             6,
         ),
     )?);
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("b", 1)), "b_new".to_string()),
             (Arc::new(Column::new("a", 0)), "a".to_string()),
@@ -1328,7 +1352,7 @@ fn test_repartition_after_projection() -> Result<()> {
         ],
         repartition,
     )?) as _;
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
     assert_snapshot!(
         actual,
@@ -1342,7 +1366,9 @@ fn test_repartition_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
@@ -1388,7 +1414,7 @@ fn test_sort_after_projection() -> Result<()> {
         .into(),
         csv,
     );
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("c", 2)), "c".to_string()),
             (Arc::new(Column::new("a", 0)), "new_a".to_string()),
@@ -1397,7 +1423,7 @@ fn test_sort_after_projection() -> Result<()> {
         Arc::new(sort_exec),
     )?) as _;
 
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
     assert_snapshot!(
         actual,
@@ -1411,7 +1437,9 @@ fn test_sort_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
@@ -1440,7 +1468,7 @@ fn test_sort_preserving_after_projection() -> Result<()> {
         .into(),
         csv,
     );
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("c", 2)), "c".to_string()),
             (Arc::new(Column::new("a", 0)), "new_a".to_string()),
@@ -1449,7 +1477,7 @@ fn test_sort_preserving_after_projection() -> Result<()> {
         Arc::new(sort_exec),
     )?) as _;
 
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
     assert_snapshot!(
         actual,
@@ -1463,7 +1491,9 @@ fn test_sort_preserving_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
@@ -1481,7 +1511,7 @@ fn test_sort_preserving_after_projection() -> Result<()> {
 fn test_union_after_projection() -> Result<()> {
     let csv = create_simple_csv_exec();
     let union = Arc::new(UnionExec::new(vec![csv.clone(), csv.clone(), csv]));
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (Arc::new(Column::new("c", 2)), "c".to_string()),
             (Arc::new(Column::new("a", 0)), "new_a".to_string()),
@@ -1490,7 +1520,7 @@ fn test_union_after_projection() -> Result<()> {
         union.clone(),
     )?) as _;
 
-    let initial = get_plan_string(&projection).join("\n");
+    let initial = displayable(projection.as_ref()).indent(true).to_string();
     let actual = initial.trim();
     assert_snapshot!(
         actual,
@@ -1506,7 +1536,9 @@ fn test_union_after_projection() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
@@ -1550,7 +1582,7 @@ fn test_partition_col_projection_pushdown() -> Result<()> {
     let source = partitioned_data_source();
     let partitioned_schema = source.schema();
 
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (
                 col("string_col", partitioned_schema.as_ref())?,
@@ -1571,7 +1603,9 @@ fn test_partition_col_projection_pushdown() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
@@ -1589,7 +1623,7 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> {
     let source = partitioned_data_source();
     let partitioned_schema = source.schema();
 
-    let projection = Arc::new(ProjectionExec::try_new(
+    let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
         vec![
             (
                 col("string_col", partitioned_schema.as_ref())?,
@@ -1615,7 +1649,9 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> {
     let after_optimize =
         ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?;
 
-    let after_optimize_string = get_plan_string(&after_optimize).join("\n");
+    let after_optimize_string = displayable(after_optimize.as_ref())
+        .indent(true)
+        .to_string();
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,