diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs b/datafusion/physical-expr/src/simplifier/const_evaluator.rs index 1e62e47ce2066..1f3781c537dd5 100644 --- a/datafusion/physical-expr/src/simplifier/const_evaluator.rs +++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs @@ -39,18 +39,80 @@ use crate::expressions::{Column, Literal}; /// - `1 + 2` -> `3` /// - `(1 + 2) * 3` -> `9` (with bottom-up traversal) /// - `'hello' || ' world'` -> `'hello world'` +#[deprecated( + since = "53.0.0", + note = "This function will be removed in a future release in favor of a private implementation that depends on other implementation details. Please open an issue if you have a use case for keeping it." +)] pub fn simplify_const_expr( expr: Arc, ) -> Result>> { - simplify_const_expr_with_dummy(expr, &create_dummy_batch()?) + let batch = create_dummy_batch()?; + // If expr is already a const literal or can't be evaluated into one. + if expr.as_any().is::() || (!can_evaluate_as_constant(&expr)) { + return Ok(Transformed::no(expr)); + } + + // Evaluate the expression + match expr.evaluate(&batch) { + Ok(ColumnarValue::Scalar(scalar)) => { + Ok(Transformed::yes(Arc::new(Literal::new(scalar)))) + } + Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => { + // Some operations return an array even for scalar inputs + let scalar = ScalarValue::try_from_array(&arr, 0)?; + Ok(Transformed::yes(Arc::new(Literal::new(scalar)))) + } + Ok(_) => { + // Unexpected result - keep original expression + Ok(Transformed::no(expr)) + } + Err(_) => { + // On error, keep original expression + // The expression might succeed at runtime due to short-circuit evaluation + // or other runtime conditions + Ok(Transformed::no(expr)) + } + } } -pub(crate) fn simplify_const_expr_with_dummy( +/// Simplify expressions whose immediate children are all literals. +/// +/// This function only checks the direct children of the expression, +/// not the entire subtree. It is designed to be used with bottom-up tree +/// traversal, where children are simplified before parents. +/// +/// # Example transformations +/// - `1 + 2` -> `3` +/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal, inner expr simplified first) +/// - `'hello' || ' world'` -> `'hello world'` +pub(crate) fn simplify_const_expr_immediate( expr: Arc, batch: &RecordBatch, ) -> Result>> { - // If expr is already a const literal or can't be evaluated into one. - if expr.as_any().is::() || (!can_evaluate_as_constant(&expr)) { + // Already a literal - nothing to do + if expr.as_any().is::() { + return Ok(Transformed::no(expr)); + } + + // Column references cannot be evaluated at plan time + if expr.as_any().is::() { + return Ok(Transformed::no(expr)); + } + + // Volatile nodes cannot be evaluated at plan time + if expr.is_volatile_node() { + return Ok(Transformed::no(expr)); + } + + // Since transform visits bottom-up, children have already been simplified. + // If all children are now Literals, this node can be const-evaluated. + // This is O(k) where k = number of children, instead of O(subtree). + let all_children_literal = expr + .children() + .iter() + .all(|child| child.as_any().is::()); + + if !all_children_literal { return Ok(Transformed::no(expr)); } @@ -77,6 +139,20 @@ pub(crate) fn simplify_const_expr_with_dummy( } } +/// Create a 1-row dummy RecordBatch for evaluating constant expressions. +/// +/// The batch is never actually accessed for data - it's just needed because +/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions +/// that only contain literals, the batch content is irrelevant. +/// +/// This is the same approach used in the logical expression `ConstEvaluator`. +pub(crate) fn create_dummy_batch() -> Result { + // RecordBatch requires at least one column + let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)])); + let col = new_null_array(&DataType::Null, 1); + Ok(RecordBatch::try_new(dummy_schema, vec![col])?) +} + fn can_evaluate_as_constant(expr: &Arc) -> bool { let mut can_evaluate = true; @@ -93,21 +169,11 @@ fn can_evaluate_as_constant(expr: &Arc) -> bool { can_evaluate } -/// Create a 1-row dummy RecordBatch for evaluating constant expressions. -/// -/// The batch is never actually accessed for data - it's just needed because -/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions -/// that only contain literals, the batch content is irrelevant. -/// -/// This is the same approach used in the logical expression `ConstEvaluator`. -pub(crate) fn create_dummy_batch() -> Result { - // RecordBatch requires at least one column - let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)])); - let col = new_null_array(&DataType::Null, 1); - Ok(RecordBatch::try_new(dummy_schema, vec![col])?) -} - /// Check if this expression has any column references. +#[deprecated( + since = "53.0.0", + note = "This function isn't used internally and is trivial to implement, therefore it will be removed in a future release." +)] pub fn has_column_references(expr: &Arc) -> bool { let mut has_columns = false; expr.apply(|expr| { diff --git a/datafusion/physical-expr/src/simplifier/mod.rs b/datafusion/physical-expr/src/simplifier/mod.rs index 45ead82a0a93d..3f3f8573449eb 100644 --- a/datafusion/physical-expr/src/simplifier/mod.rs +++ b/datafusion/physical-expr/src/simplifier/mod.rs @@ -24,9 +24,7 @@ use std::sync::Arc; use crate::{ PhysicalExpr, simplifier::{ - const_evaluator::{create_dummy_batch, simplify_const_expr_with_dummy}, - not::simplify_not_expr, - unwrap_cast::unwrap_cast_in_comparison, + const_evaluator::create_dummy_batch, unwrap_cast::unwrap_cast_in_comparison, }, }; @@ -67,10 +65,11 @@ impl<'a> PhysicalExprSimplifier<'a> { // Apply NOT expression simplification first, then unwrap cast optimization, // then constant expression evaluation - let rewritten = simplify_not_expr(node, schema)? + #[expect(deprecated, reason = "`simplify_not_expr` is marked as deprecated until it's made private.")] + let rewritten = not::simplify_not_expr(node, schema)? .transform_data(|node| unwrap_cast_in_comparison(node, schema))? .transform_data(|node| { - simplify_const_expr_with_dummy(node, &batch) + const_evaluator::simplify_const_expr_immediate(node, &batch) })?; #[cfg(debug_assertions)] diff --git a/datafusion/physical-expr/src/simplifier/not.rs b/datafusion/physical-expr/src/simplifier/not.rs index ea5467d0a4b42..709260aa48791 100644 --- a/datafusion/physical-expr/src/simplifier/not.rs +++ b/datafusion/physical-expr/src/simplifier/not.rs @@ -43,6 +43,10 @@ use crate::expressions::{BinaryExpr, InListExpr, Literal, NotExpr, in_list, lit} /// This function applies a single simplification rule and returns. When used with /// TreeNodeRewriter, multiple passes will automatically be applied until no more /// transformations are possible. +#[deprecated( + since = "53.0.0", + note = "This function will be made private in a future release, please file an issue if you have a reason for keeping it public." +)] pub fn simplify_not_expr( expr: Arc, schema: &Schema,