Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 17 additions & 6 deletions vortex-compute/src/mask/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use std::ops::BitAnd;
use vortex_dtype::NativePType;
use vortex_mask::Mask;
use vortex_vector::{
BoolVector, NullVector, PrimitiveVector, StructVector, Vector, match_each_pvector,
match_each_vector,
BoolVector, NullVector, PVector, PrimitiveVector, StructVector, VarBinType, VarBinVector,
Vector, match_each_pvector, match_each_vector,
};

/// Trait for masking the validity of an array or vector.
Expand Down Expand Up @@ -37,7 +37,8 @@ impl MaskValidity for NullVector {
impl MaskValidity for BoolVector {
fn mask_validity(self, mask: &Mask) -> Self {
let (bits, validity) = self.into_parts();
Self::new(bits, validity.bitand(mask))
// SAFETY: we are preserving the original bits buffer and only modifying the validity.
unsafe { Self::new_unchecked(bits, validity.bitand(mask)) }
}
}

Expand All @@ -47,16 +48,26 @@ impl MaskValidity for PrimitiveVector {
}
}

impl<T: NativePType> MaskValidity for vortex_vector::PVector<T> {
impl<T: NativePType> MaskValidity for PVector<T> {
fn mask_validity(self, mask: &Mask) -> Self {
let (data, validity) = self.into_parts();
Self::new(data, validity.bitand(mask))
// SAFETY: we are preserving the original data buffer and only modifying the validity.
unsafe { Self::new_unchecked(data, validity.bitand(mask)) }
}
}

impl<T: VarBinType> MaskValidity for VarBinVector<T> {
fn mask_validity(self, mask: &Mask) -> Self {
let (views, buffers, validity) = self.into_parts();
// SAFETY: we are preserving the original views and buffers, only modifying the validity.
unsafe { Self::new_unchecked(views, buffers, validity.bitand(mask)) }
}
}

impl MaskValidity for StructVector {
fn mask_validity(self, mask: &Mask) -> Self {
let (fields, validity) = self.into_parts();
StructVector::new(fields, validity.bitand(mask))
// SAFETY: we are preserving the original fields and only modifying the validity.
unsafe { StructVector::new_unchecked(fields, validity.bitand(mask)) }
}
}
2 changes: 2 additions & 0 deletions vortex-vector/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }
vortex-mask = { workspace = true }

static_assertions = { workspace = true }
11 changes: 6 additions & 5 deletions vortex-vector/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
//! Immutable and mutable decompressed (canonical) vectors for Vortex.

#![deny(missing_docs)]
#![deny(clippy::missing_docs_in_private_items)]
#![deny(clippy::missing_errors_doc)]
#![deny(clippy::missing_panics_doc)]
#![deny(clippy::missing_safety_doc)]
Expand All @@ -15,11 +14,13 @@ mod bool;
mod null;
mod primitive;
mod struct_;
mod varbin;

pub use bool::{BoolVector, BoolVectorMut};
pub use null::{NullVector, NullVectorMut};
pub use primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut};
pub use struct_::{StructVector, StructVectorMut};
pub use bool::*;
pub use null::*;
pub use primitive::*;
pub use struct_::*;
pub use varbin::*;

mod ops;
mod vector;
Expand Down
16 changes: 16 additions & 0 deletions vortex-vector/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ macro_rules! match_each_vector {
let $vec = v;
$body
}
$crate::Vector::String(v) => {
let $vec = v;
$body
}
$crate::Vector::Binary(v) => {
let $vec = v;
$body
}
$crate::Vector::Struct(v) => {
let $vec = v;
$body
Expand Down Expand Up @@ -104,6 +112,14 @@ macro_rules! match_each_vector_mut {
let $vec = v;
$body
}
$crate::VectorMut::String(v) => {
let $vec = v;
$body
}
$crate::VectorMut::Binary(v) => {
let $vec = v;
$body
}
$crate::VectorMut::Struct(v) => {
let $vec = v;
$body
Expand Down
3 changes: 3 additions & 0 deletions vortex-vector/src/private.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,8 @@ impl Sealed for PrimitiveVectorMut {}
impl<T: NativePType> Sealed for PVector<T> {}
impl<T: NativePType> Sealed for PVectorMut<T> {}

impl<T: VarBinType> Sealed for VarBinVector<T> {}
impl<T: VarBinType> Sealed for VarBinVectorMut<T> {}

impl Sealed for StructVector {}
impl Sealed for StructVectorMut {}
83 changes: 83 additions & 0 deletions vortex-vector/src/varbin/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

pub use types::*;
pub use vector::*;
pub use vector_mut::*;
use vortex_error::vortex_panic;

use crate::{Vector, VectorMut};

mod types;
mod vector;
mod vector_mut;
mod view;

/// Type alias for non-utf8 variable-length binary vectors.
pub type BinaryVector = VarBinVector<BinaryType>;
/// Type alias for mutable non-utf8 variable-length binary vectors.
pub type BinaryVectorMut = VarBinVectorMut<BinaryType>;
/// Type alias for UTF-8 variable-length string vectors.
pub type StringVector = VarBinVector<StringType>;
/// Type alias for mutable UTF-8 variable-length string vectors.
pub type StringVectorMut = VarBinVectorMut<StringType>;

impl VarBinTypeDowncast for Vector {
type Output<T: VarBinType> = VarBinVector<T>;

fn into_binary(self) -> Self::Output<BinaryType> {
if let Vector::Binary(v) = self {
return v;
}
vortex_panic!("Expected BinaryVector, got {self:?}");
}

fn into_string(self) -> Self::Output<StringType> {
if let Vector::String(v) = self {
return v;
}
vortex_panic!("Expected StringVector, got {self:?}");
}
}

impl VarBinTypeUpcast for Vector {
type Input<T: VarBinType> = VarBinVector<T>;

fn from_binary(input: Self::Input<BinaryType>) -> Self {
Vector::Binary(input)
}

fn from_string(input: Self::Input<StringType>) -> Self {
Vector::String(input)
}
}

impl VarBinTypeDowncast for VectorMut {
type Output<T: VarBinType> = VarBinVectorMut<T>;

fn into_binary(self) -> Self::Output<BinaryType> {
if let VectorMut::Binary(v) = self {
return v;
}
vortex_panic!("Expected BinaryVector, got {self:?}");
}

fn into_string(self) -> Self::Output<StringType> {
if let VectorMut::String(v) = self {
return v;
}
vortex_panic!("Expected StringVector, got {self:?}");
}
}

impl VarBinTypeUpcast for VectorMut {
type Input<T: VarBinType> = VarBinVectorMut<T>;

fn from_binary(input: Self::Input<BinaryType>) -> Self {
VectorMut::Binary(input)
}

fn from_string(input: Self::Input<StringType>) -> Self {
VectorMut::String(input)
}
}
93 changes: 93 additions & 0 deletions vortex-vector/src/varbin/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Variable-length binary types and related traits.

use std::fmt::Debug;

use crate::{VarBinVector, VarBinVectorMut, Vector, VectorMut};

impl<T: VarBinType> From<VarBinVector<T>> for Vector {
fn from(value: VarBinVector<T>) -> Self {
T::upcast(value)
}
}

impl<T: VarBinType> From<VarBinVectorMut<T>> for VectorMut {
fn from(value: VarBinVectorMut<T>) -> Self {
T::upcast(value)
}
}

/// Trait to mark supported binary view types.
pub trait VarBinType: Debug + Sized + private::Sealed {
/// The slice type for this variable binary type.
type Slice: ?Sized + AsRef<[u8]>;

/// Downcast the provided object to a type-specific instance.
fn downcast<V: VarBinTypeDowncast>(visitor: V) -> V::Output<Self>;

/// Upcast a type-specific instance to a generic instance.
fn upcast<V: VarBinTypeUpcast>(input: V::Input<Self>) -> V;
}

/// [`BinaryType`] for UTF-8 strings.
#[derive(Clone, Debug)]
pub struct StringType;
impl VarBinType for StringType {
type Slice = str;

fn downcast<V: VarBinTypeDowncast>(visitor: V) -> V::Output<Self> {
visitor.into_string()
}

fn upcast<V: VarBinTypeUpcast>(input: V::Input<Self>) -> V {
V::from_string(input)
}
}

/// [`BinaryType`] for raw binary data.
#[derive(Clone, Debug)]
pub struct BinaryType;
impl VarBinType for BinaryType {
type Slice = [u8];

fn downcast<V: VarBinTypeDowncast>(visitor: V) -> V::Output<Self> {
visitor.into_binary()
}

fn upcast<V: VarBinTypeUpcast>(input: V::Input<Self>) -> V {
V::from_binary(input)
}
}

/// Trait for downcasting generic variable binary types to specific types.
pub trait VarBinTypeDowncast {
/// The output type after downcasting.
type Output<T: VarBinType>;

/// Downcast to a binary type.
fn into_binary(self) -> Self::Output<BinaryType>;
/// Downcast to a string type.
fn into_string(self) -> Self::Output<StringType>;
}

/// Trait for upcasting specific variable binary types to generic types.
pub trait VarBinTypeUpcast {
/// The input type for upcasting.
type Input<T: VarBinType>;

/// Upcast from a binary type.
fn from_binary(input: Self::Input<BinaryType>) -> Self;
/// Upcast from a string type.
fn from_string(input: Self::Input<StringType>) -> Self;
}

/// Private module to seal the [`VarBinType`] trait.
mod private {
/// Sealed trait to prevent external implementations of [`VarBinType`].
pub trait Sealed {}

impl Sealed for super::StringType {}
impl Sealed for super::BinaryType {}
}
76 changes: 76 additions & 0 deletions vortex-vector/src/varbin/vector.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Variable-length binary vector implementation.

use std::sync::Arc;

use vortex_buffer::{Buffer, ByteBuffer};
use vortex_mask::Mask;

use crate::VectorOps;
use crate::varbin::VarBinType;
use crate::varbin::vector_mut::VarBinVectorMut;
use crate::varbin::view::BinaryView;

/// A variable-length binary vector.
#[derive(Debug, Clone)]
pub struct VarBinVector<T: VarBinType> {
/// Views into the binary data.
views: Buffer<BinaryView>,
/// Buffers holding the referenced binary data.
buffers: Arc<Box<[ByteBuffer]>>,
/// Validity mask for the vector.
validity: Mask,
/// Marker trait for the [`VarBinType`].
_marker: std::marker::PhantomData<T>,
}

impl<T: VarBinType> VarBinVector<T> {
/// Creates a new [`VarBinVector`] from the provided components.
///
/// # Safety
///
/// This function is unsafe because it does not validate the consistency of the provided
/// components.
///
/// The caller must ensure that:
/// - The length of the `validity` mask matches the length of the `views` buffer.
/// - The `views` buffer correctly references the data in the `buffers`.
pub unsafe fn new_unchecked(
views: Buffer<BinaryView>,
buffers: Arc<Box<[ByteBuffer]>>,
validity: Mask,
) -> Self {
Self {
views,
buffers,
validity,
_marker: std::marker::PhantomData,
}
}

/// Decomposes the vector into its constituent parts.
pub fn into_parts(self) -> (Buffer<BinaryView>, Arc<Box<[ByteBuffer]>>, Mask) {
(self.views, self.buffers, self.validity)
}
}

impl<T: VarBinType> VectorOps for VarBinVector<T> {
type Mutable = VarBinVectorMut<T>;

fn len(&self) -> usize {
self.views.len()
}

fn validity(&self) -> &Mask {
&self.validity
}

fn try_into_mut(self) -> Result<Self::Mutable, Self>
where
Self: Sized,
{
todo!()
}
}
Loading
Loading