Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
109 commits
Select commit Hold shift + click to select a range
424b73d
Refactor: split test_window_partial_constant_and_set_monotonicity int…
alamb Oct 15, 2025
a61a9c2
fix: Ensure ListingTable partitions are pruned when filters are not u…
peasee Oct 15, 2025
41fdab9
Push Down Filter Subexpressions in Nested Loop Joins as Projections (…
tobixdev Oct 15, 2025
264030c
feat: support Spark `concat` string function (#18063)
comphead Oct 15, 2025
4153adf
Add independent configs for topk/join dynamic filter (#18090)
xudong963 Oct 16, 2025
3bca1bb
Adds Trace and Summary to CLI instrumented stores (#18064)
BlakeOrth Oct 16, 2025
ec3ca71
fix: Improve null handling in array_to_string function (#18076)
Weijun-H Oct 16, 2025
c8e0f1c
feat: update .asf.yaml configuration settings (#18027)
Weijun-H Oct 16, 2025
0a57e01
Fix extended tests on main to get CI green (#18096)
alamb Oct 16, 2025
9bfa2ae
chore(deps): bump taiki-e/install-action from 2.62.29 to 2.62.31 (#18…
dependabot[bot] Oct 16, 2025
b1723e5
chore: run extended suite on PRs for critical areas (#18088)
comphead Oct 16, 2025
4e03c92
refactor: add dialect enum (#18043)
dariocurr Oct 16, 2025
ea83c26
#17982 Make `nvl` a thin wrapper for `coalesce` (#17991)
pepijnve Oct 16, 2025
7c3b0d0
minor: fix incorrect deprecation version & window docs (#18093)
Jefffrey Oct 16, 2025
337378a
chore: use `NullBuffer::union` for Spark `concat` (#18087)
comphead Oct 16, 2025
cadf429
feat: support `null_treatment`, `distinct`, and `filter` for window f…
dqkqd Oct 17, 2025
c84e3cf
feat: Add percentile_cont aggregate function (#17988)
adriangb Oct 17, 2025
621a249
fix: Re-bump latest datafusion-testing module so extended tests succe…
Jefffrey Oct 17, 2025
ffe64e3
chore(deps): bump taiki-e/install-action from 2.62.31 to 2.62.33 (#18…
dependabot[bot] Oct 17, 2025
0ae9fdc
Adding hiop as known user (#18114)
enryls Oct 17, 2025
a9ecd68
chore: remove unnecessary `skip_failed_rules` config in slt (#18117)
Jefffrey Oct 17, 2025
fe95505
move repartition to insta (#18106)
blaginin Oct 17, 2025
3272ebe
refactor: move ListingTable over to the catalog-listing-table crate (…
timsaucer Oct 17, 2025
8e1d13a
refactor: move arrow datasource to new `datafusion-datasource-arrow` …
timsaucer Oct 17, 2025
dce59f8
Adds instrumentation to LIST operations in CLI (#18103)
BlakeOrth Oct 17, 2025
7605023
feat: spark udf array shuffle (#17674)
chenkovsky Oct 17, 2025
f0ab136
make Union::try_new pub (#18125)
leoyvens Oct 17, 2025
c956104
fix: window unparsing (#17367)
chenkovsky Oct 17, 2025
ec2402a
feat: Support configurable `EXPLAIN ANALYZE` detail level (#18098)
2010YOUY01 Oct 17, 2025
2222abd
refactor: remove unused `type_coercion/aggregate.rs` functions (#18091)
Jefffrey Oct 17, 2025
765f2b9
Add extra case_when benchmarks (#18097)
pepijnve Oct 17, 2025
7d294f1
fix: Add dictionary coercion support for numeric comparison operation…
ahmed-mez Oct 17, 2025
ec3d20b
Adds instrumentation to delimited LIST operations in CLI (#18134)
BlakeOrth Oct 17, 2025
522403b
feat: add fp16 support to Substrait (#18086)
westonpace Oct 17, 2025
1b001a1
fix(substrait): schema errors for Aggregates with no groupings (#17909)
vbarua Oct 17, 2025
5a074ea
Improve datafusion-cli object store profiling summary display (#18085)
alamb Oct 17, 2025
e323357
test: `to_timestamp(double)` for vectorized input (#18147)
dqkqd Oct 18, 2025
9079bbd
Fix `concat_elements_utf8view` capacity initialization. (#18003)
samueleresca Oct 18, 2025
0ddc82e
Use < instead of = in case benchmark predicates, use Integers (#18144)
pepijnve Oct 18, 2025
93f136c
Adds instrumentation to PUT ops in the CLI (#18139)
BlakeOrth Oct 18, 2025
28a6854
[main] chore: Fix `no space left on device` (#18141) (#18151)
alamb Oct 18, 2025
b98cad6
Fix `DISTINCT ON` for tables with no columns (ReplaceDistinctWithAggr…
Tpt Oct 19, 2025
f199b00
refactor: remove core crate from datafusion-proto (#18123)
timsaucer Oct 19, 2025
f198fc8
Fix quadratic runtime in min_max_bytes (#18044)
ctsk Oct 19, 2025
35b2e35
fix: `array_distinct` inner nullability causing type mismatch (#18104)
dqkqd Oct 20, 2025
7c215ed
Short circuit complex case evaluation modes as soon as possible (#17898)
pepijnve Oct 20, 2025
7f75e58
perf: Fix NLJ slow join with condition `array_has` (#18161)
2010YOUY01 Oct 20, 2025
5d23723
chore(deps): bump getrandom from 0.3.3 to 0.3.4 (#18163)
dependabot[bot] Oct 20, 2025
fcbbfa4
chore(deps): bump tokio from 1.47.1 to 1.48.0 (#18164)
dependabot[bot] Oct 20, 2025
5c19eed
chore(deps): bump indexmap from 2.11.4 to 2.12.0 (#18162)
dependabot[bot] Oct 20, 2025
b1deb1f
chore(deps): bump bzip2 from 0.6.0 to 0.6.1 (#18165)
dependabot[bot] Oct 20, 2025
a4acec3
fix: improve document ui (#18157)
getChan Oct 20, 2025
54fff60
perf: improve `ScalarValue::to_array_of_size` for Boolean and some nu…
rluvaton Oct 20, 2025
37aad28
Feat: Make current_time aware of execution timezone. (#18040)
codetyri0n Oct 20, 2025
1f434dc
feat: `ClassicJoin` for PWMJ (#17482)
jonathanc-n Oct 21, 2025
155b56e
fix(docs): resolve extra outline on tables (#18193)
foskey51 Oct 21, 2025
b5b7f9b
chore(deps): bump taiki-e/install-action from 2.62.33 to 2.62.34 (#18…
dependabot[bot] Oct 21, 2025
77a4cb7
Fix COPY TO does not produce an output file for the empty set (#18074)
bert-beyondloops Oct 21, 2025
347b2b6
Add Projection struct w/ helper methods to manipulate projections (#1…
adriangb Oct 21, 2025
8d54e7b
Add TableSchema helper to encapsulate file schema + partition fields …
adriangb Oct 21, 2025
1e30aed
Add spilling to RepartitionExec (#18014)
adriangb Oct 21, 2025
d5ea5e9
Adds DELETE and HEAD instrumentation to CLI (#18206)
BlakeOrth Oct 21, 2025
8054bb8
[branch-50] Prepare 50.3.0 release version number and README (#18173)…
alamb Oct 21, 2025
31109e4
feat(docs): display compatible logo for dark mode (#18197)
foskey51 Oct 21, 2025
6d52e54
Docs: Update SQL example for current_time() and current_date(). (#18200)
codetyri0n Oct 21, 2025
531af8e
feat: Add `deregister_object_store` (#17999)
jonathanc-n Oct 21, 2025
b7a10ad
fix: Use dynamic timezone in now() function for accurate timestamp (#…
Weijun-H Oct 22, 2025
114beec
Fix array_has simplification with null argument (#18186)
joroKr21 Oct 22, 2025
4aceda0
chore(deps): bump taiki-e/install-action from 2.62.34 to 2.62.35 (#18…
dependabot[bot] Oct 22, 2025
6ecf76c
bench: create benchmark for lookup table like `CASE WHEN` (#18203)
rluvaton Oct 22, 2025
774b6fe
Adds instrumentation to COPY operations in the CLI (#18227)
BlakeOrth Oct 22, 2025
47fd638
Consolidate core_integration/datasource and rename parquet_source -->…
alamb Oct 23, 2025
340834d
feat: Add existence join to NestedLoopJoin benchmarks (#18005)
jonathanc-n Oct 23, 2025
408e1e4
doc: Add `Metrics` section to the user-guide (#18216)
2010YOUY01 Oct 23, 2025
92c5607
fix: UnnestExec preserves relevant equivalence properties of input (#…
vegarsti Oct 23, 2025
be85bf4
CoalescePartitionsExec fetch is not consistent with one partition and…
zhuqi-lucas Oct 23, 2025
144f155
fix: wrong simplification for >= >, <= < (#18222)
chenkovsky Oct 23, 2025
d127973
Migrate core test to insta part 3 (#16978)
Chen-Yuan-Lai Oct 23, 2025
fef3b71
docs: Update HOWTOs for adding new functions (#18089)
Jefffrey Oct 24, 2025
1af3699
docs: fix trim for `rust,ignore` blocks (#18239)
Jefffrey Oct 24, 2025
9f23680
chore(deps): bump taiki-e/install-action from 2.62.35 to 2.62.36 (#18…
dependabot[bot] Oct 24, 2025
167baf7
Fix: Do not normalize table names when deserializing from protobuf (#…
drin Oct 24, 2025
665a552
Use TableSchema in FileScanConfig (#18231)
adriangb Oct 24, 2025
619123a
Revert "chore: revert tests (#18065)" (#18255)
dqkqd Oct 24, 2025
22c4214
Refactor `nvl2` Function to Support Lazy Evaluation and Simplificatio…
kosiew Oct 24, 2025
a8373d2
docs: refine `AggregateUDFImpl::is_ordered_set_aggregate` documentati…
Jefffrey Oct 24, 2025
82b1307
Enable placeholders with extension types (#17986)
paleolimbot Oct 24, 2025
987f333
fix: only fall back to listing prefixes on 404 errors (#18263)
colinmarc Oct 24, 2025
f4a49b5
feat(small): Set 'summary' level metrics for `DataSourceExec` with pa…
2010YOUY01 Oct 25, 2025
97b9029
fix null count stats computation (#18276)
adriangb Oct 25, 2025
eef1c9e
feat: be indifferent to padding when decoding base64 (#18264)
colinmarc Oct 25, 2025
2a82897
docs: fix broken SQL & DataFrame links in root README (#18153) (#18274)
manasa-manoj-nbr Oct 25, 2025
4309b85
Improve docs and examples for `DataTypeExt` and `FieldExt` (#18271)
alamb Oct 26, 2025
d072554
doc: Contributor guide for AI-generated PRs (#18237)
2010YOUY01 Oct 26, 2025
8142360
Easier construction of ScalarAndMetadata (#18272)
alamb Oct 26, 2025
2bb7bf6
doc: Add Join Physical Plan documentation, and configuration flag to …
jonathanc-n Oct 26, 2025
e2516e2
Implement `DESCRIBE SELECT` to show schema rather than `EXPLAIN` plan…
djanderson Oct 26, 2025
61d3543
Add integration test for IO operations for listing tables queries (#1…
alamb Oct 26, 2025
8f396b8
Push partition_statistics into DataSource (#18233)
adriangb Oct 27, 2025
4ecccde
feat: Add `output_bytes` to baseline metrics (#18268)
2010YOUY01 Oct 27, 2025
0a8f154
Fix: Error rather than silently ignore extra parameter passed to ceil…
toxicteddy00077 Oct 27, 2025
f870dcd
fix: Support Dictionary[Int32, Binary] for bitmap count spark functio…
kazantsev-maksim Oct 27, 2025
0daa88c
chore(deps): Update `half` to 2.7.1, ignore `RUSTSEC-2025-0111` (#18287)
alamb Oct 27, 2025
868c455
chore(deps): bump taiki-e/install-action from 2.62.36 to 2.62.38 (#18…
dependabot[bot] Oct 27, 2025
c09ca5f
"Gentle Introduction to Arrow / Record Batches" #11336 (#18051)
sm4rtm4art Oct 27, 2025
b817dcd
chore(deps): bump regex from 1.11.3 to 1.12.2 (#18294)
dependabot[bot] Oct 27, 2025
1feb80f
chore(deps): bump clap from 4.5.48 to 4.5.50 (#18292)
dependabot[bot] Oct 27, 2025
c6ad17c
Upgrade DataFusion to arrow/parquet 57.0.0 (#17888)
alamb Oct 27, 2025
440fb82
chore(deps): bump syn from 2.0.106 to 2.0.108 (#18291)
dependabot[bot] Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Enable placeholders with extension types (apache#17986)
## Which issue does this PR close?

- Closes apache#17862

## Rationale for this change

Most logical plan expressions now propagate metadata; however,
parameters with extension types or other field metadata cannot
participate in placeholder/parameter binding.

## What changes are included in this PR?

The DataType in the Placeholder struct was replaced with a FieldRef
along with anything that stored the "DataType" of a parameter.

Strictly speaking one could bind parameters with an extension type by
copy/pasting the placeholder replacer, which I figured out towards the
end of this change. I still think this change makes sense and opens up
the door for things like handling UUID in SQL with full parameter
binding support.

## Are these changes tested?

Yes

## Are there any user-facing changes?

Yes, one new function was added to extract the placeholder fields from a
plan.

This is a breaking change for code that specifically interacts with the
pub fields of the modified structs (ParamValues, Placeholder, and
Prepare are the main ones).

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
paleolimbot and alamb authored Oct 24, 2025
commit 82b1307f311e700ec97c1a1da9135c9679407e62
107 changes: 107 additions & 0 deletions datafusion/common/src/datatype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! [DataTypeExt] extension trait for converting DataTypes to Fields

use crate::arrow::datatypes::{DataType, Field, FieldRef};
use std::sync::Arc;

/// DataFusion extension methods for Arrow [`DataType`]
pub trait DataTypeExt {
/// Convert the type to field with nullable type and "" name
///
/// This is used to track the places where we convert a [`DataType`]
/// into a nameless field to interact with an API that is
/// capable of representing an extension type and/or nullability.
fn into_nullable_field(self) -> Field;

/// Convert the type to field ref with nullable type and "" name
///
/// Concise wrapper around [`DataTypeExt::into_nullable_field`] that
/// constructs a [`FieldRef`].
fn into_nullable_field_ref(self) -> FieldRef;
}

impl DataTypeExt for DataType {
fn into_nullable_field(self) -> Field {
Field::new("", self, true)
}

fn into_nullable_field_ref(self) -> FieldRef {
Arc::new(Field::new("", self, true))
}
}

/// DataFusion extension methods for Arrow [`Field`]
pub trait FieldExt {
/// Returns a new Field representing a List of this Field's DataType.
fn into_list(self) -> Self;

/// Return a new Field representing this Field as the item type of a FixedSizeList
fn into_fixed_size_list(self, list_size: i32) -> Self;

/// Create a field with the default list field name ("item")
///
/// Note that lists are allowed to have an arbitrarily named field;
/// however, a name other than 'item' will cause it to fail an
/// == check against a more idiomatically created list in
/// arrow-rs which causes issues.
fn into_list_item(self) -> Self;
}

impl FieldExt for Field {
fn into_list(self) -> Self {
DataType::List(Arc::new(self.into_list_item())).into_nullable_field()
}

fn into_fixed_size_list(self, list_size: i32) -> Self {
DataType::FixedSizeList(self.into_list_item().into(), list_size)
.into_nullable_field()
}

fn into_list_item(self) -> Self {
if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
self.with_name(Field::LIST_FIELD_DEFAULT_NAME)
} else {
self
}
}
}

impl FieldExt for Arc<Field> {
fn into_list(self) -> Self {
DataType::List(self.into_list_item())
.into_nullable_field()
.into()
}

fn into_fixed_size_list(self, list_size: i32) -> Self {
DataType::FixedSizeList(self.into_list_item(), list_size)
.into_nullable_field()
.into()
}

fn into_list_item(self) -> Self {
if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
Arc::unwrap_or_clone(self)
.with_name(Field::LIST_FIELD_DEFAULT_NAME)
.into()
} else {
self
}
}
}
2 changes: 2 additions & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pub mod alias;
pub mod cast;
pub mod config;
pub mod cse;
pub mod datatype;
pub mod diagnostic;
pub mod display;
pub mod encryption;
Expand All @@ -47,6 +48,7 @@ pub mod file_options;
pub mod format;
pub mod hash_utils;
pub mod instant;
pub mod metadata;
pub mod nested_struct;
mod null_equality;
pub mod parsers;
Expand Down
Loading