From 4d41a02fd6248a4ff8c9913876c056bc8fe500f1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Feb 2026 15:03:52 -0500 Subject: [PATCH] Avoid creating new RecordBatches to simplify expressions --- .../src/simplifier/const_evaluator.rs | 26 ++++++++++++++----- .../physical-expr/src/simplifier/mod.rs | 2 +- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs b/datafusion/physical-expr/src/simplifier/const_evaluator.rs index 1f3781c537dd5..148a9bdcec80d 100644 --- a/datafusion/physical-expr/src/simplifier/const_evaluator.rs +++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs @@ -23,7 +23,7 @@ use arrow::array::new_null_array; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; -use datafusion_common::{Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, internal_datafusion_err}; use datafusion_expr_common::columnar_value::ColumnarValue; use crate::PhysicalExpr; @@ -53,7 +53,7 @@ pub fn simplify_const_expr( } // Evaluate the expression - match expr.evaluate(&batch) { + match expr.evaluate(batch) { Ok(ColumnarValue::Scalar(scalar)) => { Ok(Transformed::yes(Arc::new(Literal::new(scalar)))) } @@ -146,11 +146,23 @@ pub(crate) fn simplify_const_expr_immediate( /// that only contain literals, the batch content is irrelevant. /// /// This is the same approach used in the logical expression `ConstEvaluator`. -pub(crate) fn create_dummy_batch() -> Result { - // RecordBatch requires at least one column - let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)])); - let col = new_null_array(&DataType::Null, 1); - Ok(RecordBatch::try_new(dummy_schema, vec![col])?) +pub(crate) fn create_dummy_batch() -> Result<&'static RecordBatch> { + static DUMMY_BATCH: std::sync::OnceLock> = + std::sync::OnceLock::new(); + DUMMY_BATCH + .get_or_init(|| { + // RecordBatch requires at least one column + let dummy_schema = + Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)])); + let col = new_null_array(&DataType::Null, 1); + Ok(RecordBatch::try_new(dummy_schema, vec![col])?) + }) + .as_ref() + .map_err(|e| { + internal_datafusion_err!( + "Failed to create dummy batch for constant expression evaluation: {e}" + ) + }) } fn can_evaluate_as_constant(expr: &Arc) -> bool { diff --git a/datafusion/physical-expr/src/simplifier/mod.rs b/datafusion/physical-expr/src/simplifier/mod.rs index 3f3f8573449eb..0c2b52b4e63d7 100644 --- a/datafusion/physical-expr/src/simplifier/mod.rs +++ b/datafusion/physical-expr/src/simplifier/mod.rs @@ -69,7 +69,7 @@ impl<'a> PhysicalExprSimplifier<'a> { let rewritten = not::simplify_not_expr(node, schema)? .transform_data(|node| unwrap_cast_in_comparison(node, schema))? .transform_data(|node| { - const_evaluator::simplify_const_expr_immediate(node, &batch) + const_evaluator::simplify_const_expr_immediate(node, batch) })?; #[cfg(debug_assertions)]