From 39efea421e27ff4b4d8d45db8986f95073ba242a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Oct 2022 17:55:46 -0400 Subject: [PATCH] Remove some uneeded code in `CommonSubexprEliminate` --- .../optimizer/src/common_subexpr_eliminate.rs | 74 +++---------------- 1 file changed, 9 insertions(+), 65 deletions(-) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index cea5e8c46eb5e..db3d2ca8dcda7 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -95,9 +95,7 @@ fn optimize( alias, }) => { let input_schema = Arc::clone(input.schema()); - let all_schemas: Vec = - plan.all_schemas().into_iter().cloned().collect(); - let arrays = to_arrays(expr, input_schema, all_schemas, &mut expr_set)?; + let arrays = to_arrays(expr, input_schema, &mut expr_set)?; let (mut new_expr, new_input) = rewrite_expr( &[expr], @@ -116,17 +114,8 @@ fn optimize( } LogicalPlan::Filter(Filter { predicate, input }) => { let input_schema = Arc::clone(input.schema()); - let all_schemas: Vec = - plan.all_schemas().into_iter().cloned().collect(); - let mut id_array = vec![]; - expr_to_identifier( - predicate, - &mut expr_set, - &mut id_array, - input_schema, - all_schemas, - )?; + expr_to_identifier(predicate, &mut expr_set, &mut id_array, input_schema)?; let (mut new_expr, new_input) = rewrite_expr( &[&[predicate.clone()]], @@ -153,10 +142,7 @@ fn optimize( schema, }) => { let input_schema = Arc::clone(input.schema()); - let all_schemas: Vec = - plan.all_schemas().into_iter().cloned().collect(); - let arrays = - to_arrays(window_expr, input_schema, all_schemas, &mut expr_set)?; + let arrays = to_arrays(window_expr, input_schema, &mut expr_set)?; let (mut new_expr, new_input) = rewrite_expr( &[window_expr], @@ -179,16 +165,9 @@ fn optimize( schema, }) => { let input_schema = Arc::clone(input.schema()); - let all_schemas: Vec = - plan.all_schemas().into_iter().cloned().collect(); - let group_arrays = to_arrays( - group_expr, - Arc::clone(&input_schema), - all_schemas.clone(), - &mut expr_set, - )?; - let aggr_arrays = - to_arrays(aggr_expr, input_schema, all_schemas, &mut expr_set)?; + let group_arrays = + to_arrays(group_expr, Arc::clone(&input_schema), &mut expr_set)?; + let aggr_arrays = to_arrays(aggr_expr, input_schema, &mut expr_set)?; let (mut new_expr, new_input) = rewrite_expr( &[group_expr, aggr_expr], @@ -210,9 +189,7 @@ fn optimize( } LogicalPlan::Sort(Sort { expr, input, fetch }) => { let input_schema = Arc::clone(input.schema()); - let all_schemas: Vec = - plan.all_schemas().into_iter().cloned().collect(); - let arrays = to_arrays(expr, input_schema, all_schemas, &mut expr_set)?; + let arrays = to_arrays(expr, input_schema, &mut expr_set)?; let (mut new_expr, new_input) = rewrite_expr( &[expr], @@ -271,19 +248,12 @@ fn pop_expr(new_expr: &mut Vec>) -> Result> { fn to_arrays( expr: &[Expr], input_schema: DFSchemaRef, - all_schemas: Vec, expr_set: &mut ExprSet, ) -> Result>> { expr.iter() .map(|e| { let mut id_array = vec![]; - expr_to_identifier( - e, - expr_set, - &mut id_array, - Arc::clone(&input_schema), - all_schemas.clone(), - )?; + expr_to_identifier(e, expr_set, &mut id_array, Arc::clone(&input_schema))?; Ok(id_array) }) @@ -394,13 +364,6 @@ struct ExprIdentifierVisitor<'a> { /// input schema for the node that we're optimizing, so we can determine the correct datatype /// for each subexpression input_schema: DFSchemaRef, - /// all schemas in the logical plan, as a fall back if we cannot resolve an expression type - /// from the input schema alone - // This fallback should never be necessary as the expression datatype should always be - // resolvable from the input schema of the node that's being optimized. - // todo: This can likely be removed if we are sure it's safe to do so. - all_schemas: Vec, - // inner states visit_stack: Vec, /// increased in pre_visit, start from 0. @@ -478,23 +441,7 @@ impl ExpressionVisitor for ExprIdentifierVisitor<'_> { self.id_array[idx] = (self.series_number, desc.clone()); self.visit_stack.push(VisitRecord::ExprItem(desc.clone())); - let data_type = if let Ok(data_type) = expr.get_type(&self.input_schema) { - data_type - } else { - // Expression type could not be resolved in schema, fall back to all schemas. - // - // This fallback should never be necessary as the expression datatype should always be - // resolvable from the input schema of the node that's being optimized. - // todo: This else-branch can likely be removed if we are sure it's safe to do so. - let merged_schema = - self.all_schemas - .iter() - .fold(DFSchema::empty(), |mut lhs, rhs| { - lhs.merge(rhs); - lhs - }); - expr.get_type(&merged_schema)? - }; + let data_type = expr.get_type(&self.input_schema)?; self.expr_set .entry(desc) @@ -510,13 +457,11 @@ fn expr_to_identifier( expr_set: &mut ExprSet, id_array: &mut Vec<(usize, Identifier)>, input_schema: DFSchemaRef, - all_schemas: Vec, ) -> Result<()> { expr.accept(ExprIdentifierVisitor { expr_set, id_array, input_schema, - all_schemas, visit_stack: vec![], node_count: 0, series_number: 0, @@ -669,7 +614,6 @@ mod test { &mut HashMap::new(), &mut id_array, Arc::clone(&schema), - vec![schema], )?; let expected = vec![