Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions vortex-duckdb/src/exporter/canonical.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
use vortex::array::ArrayRef;
use vortex::array::Canonical;
use vortex::array::ExecutionCtx;
use vortex::array::arrays::TemporalArray;
use vortex::error::VortexResult;
use vortex::error::vortex_bail;

use crate::exporter::ColumnExporter;
use crate::exporter::ConversionCache;
use crate::exporter::all_invalid;
use crate::exporter::bool;
use crate::exporter::decimal;
use crate::exporter::fixed_size_list;
use crate::exporter::list_view;
use crate::exporter::primitive;
use crate::exporter::struct_;
use crate::exporter::temporal;
use crate::exporter::varbinview;

pub(crate) fn new_exporter(
array: ArrayRef,
cache: &ConversionCache,
ctx: &mut ExecutionCtx,
) -> VortexResult<Box<dyn ColumnExporter>> {
match array.execute::<Canonical>(ctx)? {
Canonical::Null(_) => Ok(all_invalid::new_exporter()),
Canonical::Bool(array) => bool::new_exporter(array, ctx),
Canonical::Primitive(array) => primitive::new_exporter(array, ctx),
Canonical::Decimal(array) => decimal::new_exporter(array, ctx),
Canonical::VarBinView(array) => varbinview::new_exporter(array, ctx),
Canonical::List(array) => list_view::new_exporter(array, cache, ctx),
Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache, ctx),
Canonical::Struct(array) => struct_::new_exporter(array, cache, ctx),
Canonical::Extension(ext) => {
if let Ok(temporal_array) = TemporalArray::try_from(ext) {
return temporal::new_exporter(temporal_array, ctx);
}
vortex_bail!("no non-temporal extension exporter")
}
Canonical::Variant(_) => {
vortex_bail!("Variant arrays can't be exported to DuckDB")
}
}
}
114 changes: 94 additions & 20 deletions vortex-duckdb/src/exporter/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,11 @@ mod tests {
use vortex::array::validity::Validity;
use vortex::buffer::Buffer;
use vortex::buffer::buffer;
use vortex::dtype::DType;
use vortex::dtype::PType;
use vortex::encodings::runend::RunEnd;
use vortex::error::VortexExpect;
use vortex::error::VortexResult;

use super::*;
use crate::SESSION;
Expand All @@ -160,13 +164,12 @@ mod tests {

#[test]
fn test_export_empty_list() {
let list = unsafe {
ListArray::new_unchecked(
Buffer::<u32>::empty().into_array(),
Buffer::<u32>::empty().into_array(),
Validity::AllValid,
)
}
let list = ListArray::try_new(
Buffer::<u32>::empty().into_array(),
buffer![0u32].into_array(),
Validity::AllValid,
)
.vortex_expect("list creation should succeed")
.into_array();

let list_type = LogicalType::list_type(LogicalType::uint32())
Expand All @@ -189,20 +192,91 @@ mod tests {
}

#[test]
fn test_export_non_empty_list_of_strings() {
let list = unsafe {
ListArray::new_unchecked(
<VarBinArray as FromIterator<_>>::from_iter([
Some("abc"),
Some("def"),
None,
Some("ghi"),
])
.into_array(),
buffer![0u8, 1, 2, 3, 4].into_array(),
Validity::from_iter([true, true, false, true]),
fn test_export_u64_list() {
let list = ListArray::try_new(
buffer![1u64, 2, 3, 4, 5].into_array(),
buffer![0u8, 1, 2, 3, 4, 5].into_array(),
Validity::AllValid,
)
.vortex_expect("list creation should succeed")
.into_array();
assert_eq!(
list.dtype(),
&DType::List(
Arc::new(DType::Primitive(PType::U64, false.into())),
true.into()
)
}
);

let list_type = LogicalType::list_type(LogicalType::uint64())
.vortex_expect("LogicalTypeRef creation should succeed for test data");
let mut chunk = DataChunk::new([list_type]);

let mut ctx = SESSION.create_execution_ctx();
new_array_exporter(list, &ConversionCache::default(), &mut ctx)
.unwrap()
.export(0, 5, chunk.get_vector_mut(0), &mut ctx)
.unwrap();
chunk.set_len(5);

assert_eq!(
format!("{}", String::try_from(&*chunk).unwrap()),
r#"Chunk - [1 Columns]
- FLAT UBIGINT[]: 5 = [ [1], [2], [3], [4], [5]]
"#
);
}

// Ensure runend-compressed list is properly flattened
#[test]
fn test_export_list_with_runend_elements() -> VortexResult<()> {
let mut ctx = SESSION.create_execution_ctx();
let elements = RunEnd::encode(buffer![100u32, 100, 200, 200, 200].into_array(), &mut ctx)?;

let list = ListArray::try_new(
elements.into_array(),
buffer![0u32, 2, 5].into_array(),
Validity::AllValid,
)
.vortex_expect("list creation should succeed")
.into_array();

let list_type = LogicalType::list_type(LogicalType::uint32())
.vortex_expect("LogicalTypeRef creation should succeed for test data");
let mut chunk = DataChunk::new([list_type]);

new_array_exporter(list, &ConversionCache::default(), &mut ctx)?.export(
0,
2,
chunk.get_vector_mut(0),
&mut ctx,
)?;
chunk.set_len(2);

assert_eq!(
format!("{}", String::try_from(&*chunk)?),
r#"Chunk - [1 Columns]
- FLAT UINTEGER[]: 2 = [ [100, 100], [200, 200, 200]]
"#
);

Ok(())
}

#[test]
fn test_export_non_empty_list_of_strings() {
let list = ListArray::try_new(
<VarBinArray as FromIterator<_>>::from_iter([
Some("abc"),
Some("def"),
None,
Some("ghi"),
])
.into_array(),
buffer![0u8, 1, 2, 3, 4].into_array(),
Validity::from_iter([true, true, false, true]),
)
.vortex_expect("list creation should succeed")
.into_array();

let list_type = LogicalType::list_type(LogicalType::varchar())
Expand Down
26 changes: 3 additions & 23 deletions vortex-duckdb/src/exporter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
mod all_invalid;
mod bool;
mod cache;
mod canonical;
mod constant;
mod decimal;
mod dict;
Expand All @@ -22,13 +23,11 @@ mod vector;
pub use cache::ConversionCache;
pub use decimal::precision_to_duckdb_storage_size;
use vortex::array::ArrayRef;
use vortex::array::Canonical;
use vortex::array::ExecutionCtx;
use vortex::array::arrays::Constant;
use vortex::array::arrays::Dict;
use vortex::array::arrays::List;
use vortex::array::arrays::StructArray;
use vortex::array::arrays::TemporalArray;
use vortex::array::arrays::struct_::StructArrayExt;
use vortex::buffer::BitChunks;
use vortex::encodings::runend::RunEnd;
Expand Down Expand Up @@ -191,7 +190,7 @@ fn new_array_exporter_with_flatten(
};

let array = match array.try_downcast::<RunEnd>() {
Ok(array) => return run_end::new_exporter(array, cache, ctx),
Ok(array) => return run_end::new_exporter_with_flatten(array, cache, ctx, flatten),
Err(array) => array,
};

Expand All @@ -205,26 +204,7 @@ fn new_array_exporter_with_flatten(
Err(array) => array,
};

// Otherwise, we fall back to canonical
match array.execute::<Canonical>(ctx)? {
Canonical::Null(_) => Ok(all_invalid::new_exporter()),
Canonical::Bool(array) => bool::new_exporter(array, ctx),
Canonical::Primitive(array) => primitive::new_exporter(array, ctx),
Canonical::Decimal(array) => decimal::new_exporter(array, ctx),
Canonical::VarBinView(array) => varbinview::new_exporter(array, ctx),
Canonical::List(array) => list_view::new_exporter(array, cache, ctx),
Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache, ctx),
Canonical::Struct(array) => struct_::new_exporter(array, cache, ctx),
Canonical::Extension(ext) => {
if let Ok(temporal_array) = TemporalArray::try_from(ext) {
return temporal::new_exporter(temporal_array, ctx);
}
vortex_bail!("no non-temporal extension exporter")
}
Canonical::Variant(_) => {
vortex_bail!("Variant arrays can't be exported to DuckDB")
}
}
canonical::new_exporter(array, cache, ctx)
}

/// Copy the sliced bits from source into target, returning whether all copied bits are zero,
Expand Down
11 changes: 10 additions & 1 deletion vortex-duckdb/src/exporter/run_end.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::marker::PhantomData;

use vortex::array::ArrayRef;
use vortex::array::ExecutionCtx;
use vortex::array::IntoArray;
use vortex::array::arrays::PrimitiveArray;
use vortex::array::match_each_integer_ptype;
use vortex::array::search_sorted::SearchSorted;
Expand All @@ -20,6 +21,7 @@ use crate::duckdb::SelectionVector;
use crate::duckdb::VectorRef;
use crate::exporter::ColumnExporter;
use crate::exporter::cache::ConversionCache;
use crate::exporter::canonical;
use crate::exporter::new_array_exporter;

/// We export run-end arrays to a DuckDB dictionary vector, using a selection vector to
Expand All @@ -32,11 +34,18 @@ struct RunEndExporter<E: IntegerPType> {
run_end_offset: usize,
}

pub(crate) fn new_exporter(
pub(crate) fn new_exporter_with_flatten(
array: RunEndArray,
cache: &ConversionCache,
ctx: &mut ExecutionCtx,
flatten: bool,
) -> VortexResult<Box<dyn ColumnExporter>> {
// Our canonicalization is faster than creating a dictionary vector and
// letting duckdb flatten it for us.
if flatten {
return canonical::new_exporter(array.into_array(), cache, ctx);
}

let offset = array.offset();
let ends = array.ends().clone();
let values = array.values().clone();
Expand Down
Loading