From 93cb18eb3302b58b655a2c84399b85543d6e5976 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Apr 2024 07:31:11 -0400 Subject: [PATCH 1/3] Document LogicalPlan tree node transformations --- datafusion/core/src/lib.rs | 10 +++++-- datafusion/expr/src/logical_plan/mod.rs | 2 +- datafusion/expr/src/logical_plan/plan.rs | 20 ++++++++++++- datafusion/expr/src/logical_plan/tree_node.rs | 29 ++++++++++++++----- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index c213f4554fb8b..b0e2b6fa9c091 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -296,11 +296,15 @@ //! A [`LogicalPlan`] is a Directed Acyclic Graph (DAG) of other //! [`LogicalPlan`]s, each potentially containing embedded [`Expr`]s. //! -//! [`Expr`]s can be rewritten using the [`TreeNode`] API and simplified using -//! [`ExprSimplifier`]. Examples of working with and executing `Expr`s can be found in the -//! [`expr_api`.rs] example +//! `LogicalPlan`s can be rewritten with [`TreeNode`] API, see the +//! [`tree_node module`] for more details. +//! +//! [`Expr`]s can also be rewritten with [`TreeNode`] API and simplified using +//! [`ExprSimplifier`]. Examples of working with and executing `Expr`s can be +//! found in the [`expr_api`.rs] example //! //! [`TreeNode`]: datafusion_common::tree_node::TreeNode +//! [`tree_node module`]: datafusion_expr::logical_plan::tree_node //! [`ExprSimplifier`]: crate::optimizer::simplify_expressions::ExprSimplifier //! [`expr_api`.rs]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/expr_api.rs //! diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index a1fe7a6f0a51e..034440643e515 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -22,7 +22,7 @@ pub mod dml; mod extension; mod plan; mod statement; -mod tree_node; +pub mod tree_node; pub use builder::{ build_join_schema, table_scan, union, wrap_projection_for_join_if_necessary, diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index ca8d718ec090e..0d8c99113483e 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -68,6 +68,11 @@ pub use datafusion_common::{JoinConstraint, JoinType}; /// an output relation (table) with a (potentially) different /// schema. A plan represents a dataflow tree where data flows /// from leaves up to the root to produce the query result. +/// +/// # See also: +/// * [`tree_node`]: visiting and rewriting API +/// +/// [`tree_node`]: crate::logical_plan::tree_node #[derive(Clone, PartialEq, Eq, Hash)] pub enum LogicalPlan { /// Evaluates an arbitrary list of expressions (essentially a @@ -238,7 +243,10 @@ impl LogicalPlan { } /// Returns all expressions (non-recursively) evaluated by the current - /// logical plan node. This does not include expressions in any children + /// logical plan node. This does not include expressions in any children. + /// + /// Note this method `clone`s all the expressions. When possible, the + /// [`tree_node`] API should be used instead of this API. /// /// The returned expressions do not necessarily represent or even /// contributed to the output schema of this node. For example, @@ -248,6 +256,8 @@ impl LogicalPlan { /// The expressions do contain all the columns that are used by this plan, /// so if there are columns not referenced by these expressions then /// DataFusion's optimizer attempts to optimize them away. + /// + /// [`tree_node`]: crate::logical_plan::tree_node pub fn expressions(self: &LogicalPlan) -> Vec { let mut exprs = vec![]; self.apply_expressions(|e| { @@ -773,10 +783,16 @@ impl LogicalPlan { /// Returns a new `LogicalPlan` based on `self` with inputs and /// expressions replaced. /// + /// Note this method creates an entirely new node, which requires a large + /// amount of clone'ing. When possible, the [`tree_node`] API should be used + /// instead of this API. + /// /// The exprs correspond to the same order of expressions returned /// by [`Self::expressions`]. This function is used by optimizers /// to rewrite plans using the following pattern: /// + /// [`tree_node`]: crate::logical_plan::tree_node + /// /// ```text /// let new_inputs = optimize_children(..., plan, props); /// @@ -1352,6 +1368,7 @@ macro_rules! handle_transform_recursion_up { } impl LogicalPlan { + /// Visits a plan similarly to [`Self::visit`], but including embedded subqueries. pub fn visit_with_subqueries>( &self, visitor: &mut V, @@ -1365,6 +1382,7 @@ impl LogicalPlan { .visit_parent(|| visitor.f_up(self)) } + /// Rewrites a plan similarly t [`Self::visit`], but including embedded subqueries. pub fn rewrite_with_subqueries>( self, rewriter: &mut R, diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index ce26cac7970b8..c01fb0da16acc 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -15,17 +15,32 @@ // specific language governing permissions and limitations // under the License. -//! Tree node implementation for logical plan - +//! [`TreeNode`] based visiting and rewriting for [`LogicalPlan`]s +//! +//! Visiting (read only) APIs +//! * [`LogicalPlan::visit`]: recursively visit the node and all of its inputs +//! * [`LogicalPlan::visit_with_subqueries`]: recursively visit the node and all of its inputs, including subqueries +//! * [`LogicalPlan::apply_children`]: recursively visit all inputs of this node +//! * [`LogicalPlan::apply_with_subqueries`]: recursively visit all inputs and embedded subqueries. +//! * [`LogicalPlan::apply_expressions`]: (non recursively) visit all expressions of this node +//! +//! Rewriting (update) APIs: +//! * [`LogicalPlan::rewrite`]: recursively rewrite the node and all of its inputs +//! * [`LogicalPlan::rewrite_with_subqueries`]: recursively rewrite the node and all of its inputs, including subqueries +//! * [`LogicalPlan::map_children`]: recursively rewrite all inputs of this node +//! * [`LogicalPlan::map_expressions`]: (non recursively) visit all expressions of this node +//! +//! (Re)creation APIs (these require substantial cloning and thus are slow): +//! * [`LogicalPlan::with_new_exprs`]: Create a new plan with different expressions +//! * [`LogicalPlan::expressions`]: Create a new plan with different expressions use crate::{ - Aggregate, Analyze, CreateMemoryTable, CreateView, CrossJoin, DdlStatement, Distinct, - DistinctOn, DmlStatement, Explain, Extension, Filter, Join, Limit, LogicalPlan, - Prepare, Projection, RecursiveQuery, Repartition, Sort, Subquery, SubqueryAlias, - Union, Unnest, Window, + dml::CopyTo, Aggregate, Analyze, CreateMemoryTable, CreateView, CrossJoin, + DdlStatement, Distinct, DistinctOn, DmlStatement, Explain, Extension, Filter, Join, + Limit, LogicalPlan, Prepare, Projection, RecursiveQuery, Repartition, Sort, Subquery, + SubqueryAlias, Union, Unnest, Window, }; use std::sync::Arc; -use crate::dml::CopyTo; use datafusion_common::tree_node::{ Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion, }; From bf3b8a4e66dbb5ab83ce834ea91c6b9e95af028c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Apr 2024 10:14:52 -0400 Subject: [PATCH 2/3] Add exists --- datafusion/expr/src/logical_plan/tree_node.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index c01fb0da16acc..6dc7846dd7d2f 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -25,6 +25,7 @@ //! * [`LogicalPlan::apply_expressions`]: (non recursively) visit all expressions of this node //! //! Rewriting (update) APIs: +//! * [`LogicalPlan::exists`]: search for an expression in a plan //! * [`LogicalPlan::rewrite`]: recursively rewrite the node and all of its inputs //! * [`LogicalPlan::rewrite_with_subqueries`]: recursively rewrite the node and all of its inputs, including subqueries //! * [`LogicalPlan::map_children`]: recursively rewrite all inputs of this node From c79e0b908987c1a9eba9bf812f5e7ebc44a4b653 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 10 Apr 2024 05:42:52 -0400 Subject: [PATCH 3/3] touchups, add apply_subqueries, map_subqueries --- datafusion/expr/src/logical_plan/tree_node.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index 6dc7846dd7d2f..415343f886854 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -21,19 +21,21 @@ //! * [`LogicalPlan::visit`]: recursively visit the node and all of its inputs //! * [`LogicalPlan::visit_with_subqueries`]: recursively visit the node and all of its inputs, including subqueries //! * [`LogicalPlan::apply_children`]: recursively visit all inputs of this node -//! * [`LogicalPlan::apply_with_subqueries`]: recursively visit all inputs and embedded subqueries. //! * [`LogicalPlan::apply_expressions`]: (non recursively) visit all expressions of this node +//! * [`LogicalPlan::apply_subqueries`]: (non recursively) visit all subqueries of this node +//! * [`LogicalPlan::apply_with_subqueries`]: recursively visit all inputs and embedded subqueries. //! //! Rewriting (update) APIs: //! * [`LogicalPlan::exists`]: search for an expression in a plan //! * [`LogicalPlan::rewrite`]: recursively rewrite the node and all of its inputs -//! * [`LogicalPlan::rewrite_with_subqueries`]: recursively rewrite the node and all of its inputs, including subqueries //! * [`LogicalPlan::map_children`]: recursively rewrite all inputs of this node //! * [`LogicalPlan::map_expressions`]: (non recursively) visit all expressions of this node +//! * [`LogicalPlan::map_subqueries`]: (non recursively) rewrite all subqueries of this node +//! * [`LogicalPlan::rewrite_with_subqueries`]: recursively rewrite the node and all of its inputs, including subqueries //! //! (Re)creation APIs (these require substantial cloning and thus are slow): //! * [`LogicalPlan::with_new_exprs`]: Create a new plan with different expressions -//! * [`LogicalPlan::expressions`]: Create a new plan with different expressions +//! * [`LogicalPlan::expressions`]: Return a copy of the plan's expressions use crate::{ dml::CopyTo, Aggregate, Analyze, CreateMemoryTable, CreateView, CrossJoin, DdlStatement, Distinct, DistinctOn, DmlStatement, Explain, Extension, Filter, Join,