apache · gabotechs · Jun 10, 2026 · Jun 4, 2026 · Jun 10, 2026
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
@@ -30,6 +30,7 @@ mod dfschema;
 mod functional_dependencies;
 mod join_type;
 mod param_value;
+mod partitioning;
 mod schema_reference;
 mod table_reference;
 mod unnest;
@@ -92,6 +93,7 @@ pub use join_type::{JoinConstraint, JoinSide, JoinType};
 pub use nested_struct::cast_column;
 pub use null_equality::NullEquality;
 pub use param_value::ParamValues;
+pub use partitioning::{SplitPoint, validate_range_split_points};
 pub use scalar::{ScalarType, ScalarValue};
 pub use schema_reference::SchemaReference;
 pub use spans::{Location, Span, Spans};

diff --git a/datafusion/common/src/partitioning.rs b/datafusion/common/src/partitioning.rs
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::utils::compare_rows;
+use crate::{Result, ScalarValue, error::_plan_err};
+use arrow::compute::SortOptions;
+use std::cmp::Ordering;
+use std::fmt::{self, Display};
+
+/// A boundary between adjacent range partitions.
+///
+/// A split point is a tuple with one [`ScalarValue`] per partitioning
+/// expression. Split points are interpreted lexicographically according to the
+/// ordering of the range partitioning that owns them.
+///
+/// `N` split points define `N + 1` partitions:
+///
+/// ```text
+/// partition 0: key < split_points[0]
+/// partition 1: split_points[0] <= key < split_points[1]
+/// ...
+/// partition N - 1: split_points[N - 2] <= key < split_points[N - 1]
+/// partition N: split_points[N - 1] <= key
+/// ```
+///
+/// Values equal to split point `i` belong to partition `i + 1`, so interior
+/// partitions are lower-inclusive and upper-exclusive.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
+pub struct SplitPoint {
+    values: Vec<ScalarValue>,
+}
+
+impl SplitPoint {
+    /// Creates a new split point from its tuple values.
+    pub fn new(values: Vec<ScalarValue>) -> Self {
+        Self { values }
+    }
+
+    /// Returns the tuple values for this split point.
+    pub fn values(&self) -> &[ScalarValue] {
+        &self.values
+    }
+}
+
+impl Display for SplitPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let values = self
+            .values
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(", ");
+        write!(f, "({values})")
+    }
+}
+
+/// Validates that split points match the ordering width and are strictly
+/// ordered according to the provided sort options.
+pub fn validate_range_split_points(
+    split_points: &[SplitPoint],
+    sort_options: &[SortOptions],
+) -> Result<()> {
+    let width = sort_options.len();
+    for (idx, split_point) in split_points.iter().enumerate() {
+        let split_point_width = split_point.values().len();
+        if split_point_width != width {
+            return _plan_err!(
+                "Range partitioning split point {idx} has width {split_point_width}, but ordering has width {width}"
+            );
+        }
+    }
+
+    for (idx, split_points) in split_points.windows(2).enumerate() {
+        if compare_rows(
+            split_points[0].values(),
+            split_points[1].values(),
+            sort_options,
+        )? != Ordering::Less
+        {
+            return _plan_err!(
+                "Range partitioning split points must be strictly ordered: split point {idx} ({}) must be less than split point {} ({})",
+                split_points[0],
+                idx + 1,
+                split_points[1]
+            );
+        }
+    }
+
+    Ok(())
+}
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -98,7 +98,7 @@ use datafusion_physical_expr::aggregate::{
 };
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_expr::{
-    LexOrdering, PhysicalSortExpr, create_physical_sort_exprs,
+    LexOrdering, PhysicalSortExpr, RangePartitioning, create_physical_sort_exprs,
 };
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::empty::EmptyExec;
@@ -1264,6 +1264,22 @@ impl DefaultPhysicalPlanner {
                             .collect::<Result<Vec<_>>>()?;
                         Partitioning::Hash(runtime_expr, *n)
                     }
+                    LogicalPartitioning::Range(range) => {
+                        let sort_exprs = create_physical_sort_exprs(
+                            range.ordering(),
+                            input_dfschema,
+                            execution_props,
+                        )?;
+                        let ordering = LexOrdering::new(sort_exprs).ok_or_else(|| {
+                            internal_datafusion_err!(
+                                "Range repartitioning requires non-empty ordering"
+                            )
+                        })?;
+                        Partitioning::Range(RangePartitioning::try_new(
+                            ordering,
+                            range.split_points().to_vec(),
+                        )?)
+                    }
                     LogicalPartitioning::DistributeBy(_) => {
                         return not_impl_err!(
                             "Physical plan does not support DistributeBy partitioning"
@@ -3245,8 +3261,8 @@ mod tests {
     use arrow_schema::{FieldRef, SchemaRef};
     use datafusion_common::config::ConfigOptions;
     use datafusion_common::{
-        DFSchemaRef, ScalarValue, TableReference, ToDFSchema as _, assert_batches_eq,
-        assert_contains,
+        DFSchemaRef, ScalarValue, SplitPoint, TableReference, ToDFSchema as _,
+        assert_batches_eq, assert_contains,
     };
     use datafusion_execution::TaskContext;
     use datafusion_execution::runtime_env::RuntimeEnv;
@@ -3255,8 +3271,8 @@ mod tests {
     use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
     use datafusion_expr::{
         Accumulator, AggregateUDF, AggregateUDFImpl, ExprFunctionExt, LogicalPlanBuilder,
-        Signature, TableSource, UserDefinedLogicalNodeCore, Volatility,
-        WindowFunctionDefinition, col, lit,
+        RangePartitioning, Signature, TableSource, UserDefinedLogicalNodeCore,
+        Volatility, WindowFunctionDefinition, col, lit,
     };
     use datafusion_functions_aggregate::count::{count_all, count_udaf};
     use datafusion_functions_aggregate::expr_fn::sum;
@@ -3304,6 +3320,46 @@ mod tests {
         Field::new(name, DataType::Int64, nullable)
     }
 
+    #[tokio::test]
+    async fn logical_range_repartition_plans_output_partitioning() -> Result<()> {
+        let batch = RecordBatch::try_from_iter(vec![(
+            "a",
+            Arc::new(Int32Array::from(vec![1])) as ArrayRef,
+        )])?;
+        let table = Arc::new(MemTable::try_new(batch.schema(), vec![vec![batch]])?);
+        let source = Arc::new(DefaultTableSource::new(table));
+        let logical_plan = LogicalPlanBuilder::scan("test", source, None)?
+            .repartition(LogicalPartitioning::Range(RangePartitioning::try_new(
+                vec![col("a").sort(true, true)],
+                vec![SplitPoint::new(vec![ScalarValue::Int32(Some(10))])],
+            )?))?
+            .build()?;
+
+        let planner = DefaultPhysicalPlanner::default();
+        let physical_plan = planner
+            .create_initial_plan(&logical_plan, &make_session_state())
+            .await?;
+        let repartition = physical_plan
+            .as_ref()
+            .downcast_ref::<RepartitionExec>()
+            .ok_or_else(|| {
+                internal_datafusion_err!(
+                    "expected RepartitionExec, got {}",
+                    physical_plan.name()
+                )
+            })?;
+        let Partitioning::Range(range) = repartition.partitioning() else {
+            return internal_err!(
+                "expected Range target partitioning, got {:?}",
+                repartition.partitioning()
+            );
+        };
+        assert_eq!(range.partition_count(), 2);
+        assert_eq!(physical_plan.output_partitioning().partition_count(), 2);
+
+        Ok(())
+    }
+
     #[test]
     fn test_create_window_expr_unwraps_alias_with_metadata() -> Result<()> {
         use std::collections::HashMap;

diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
@@ -515,6 +515,23 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                         "Partitioning Key": hash_expr
                     })
                 }
+                Partitioning::Range(range) => {
+                    let range_expr: Vec<String> =
+                        range.ordering().iter().map(|e| format!("{e}")).collect();
+                    let split_points: Vec<String> = range
+                        .split_points()
+                        .iter()
+                        .map(|e| format!("{e}"))
+                        .collect();
+
+                    json!({
+                        "Node Type": "Repartition",
+                        "Partitioning Scheme": "Range",
+                        "Partition Count": range.partition_count(),
+                        "Partitioning Key": range_expr,
+                        "Split Points": split_points
+                    })
+                }
                 Partitioning::DistributeBy(expr) => {
                     let dist_by_expr: Vec<String> =
                         expr.iter().map(|e| format!("{e}")).collect();

diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
@@ -41,9 +41,9 @@ pub use plan::{
     Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct, DistinctOn,
     EmptyRelation, Explain, ExplainOption, Extension, FetchType, Filter, Join,
     JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, Projection,
-    RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery,
-    SubqueryAlias, TableScan, TableScanBuilder, ToStringifiedPlan, Union, Unnest, Values,
-    Window, projection_schema,
+    RangePartitioning, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan,
+    Subquery, SubqueryAlias, TableScan, TableScanBuilder, ToStringifiedPlan, Union,
+    Unnest, Values, Window, projection_schema,
 };
 pub use statement::{
     Deallocate, Execute, Prepare, ResetVariable, SetVariable, Statement,