Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 167 additions & 4 deletions vortex-array/src/scalar/scalar_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,19 @@ impl Scalar {
DType::Utf8(_) => value.as_utf8().is_empty(),
DType::Binary(_) => value.as_binary().is_empty(),
DType::List(..) => value.as_list().is_empty(),
// TODO(connor): This seems wrong...
DType::FixedSizeList(_, list_size, _) => value.as_list().len() == *list_size as usize,
// TODO(connor): This also seems wrong...
DType::Struct(struct_fields, _) => value.as_list().len() == struct_fields.nfields(),
// A fixed-size list is zero only if it has the expected number of elements and every
// element is itself a non-null zero value.
DType::FixedSizeList(_, list_size, _) => {
let list = self.as_list();
list.len() == *list_size as usize
&& (0..list.len())
.all(|i| list.element(i).is_some_and(|e| e.is_zero() == Some(true)))
}
// A struct is zero only if every one of its fields is itself a non-null zero value.
DType::Struct(..) => self
.as_struct()
.fields_iter()
.is_some_and(|mut fields| fields.all(|f| f.is_zero() == Some(true))),
Comment on lines +191 to +203

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The definitions here seem really weird to me, I think its worth documenting the "why" here.
How do other systems handle this?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why they seem weird? I feel like this is the only possible correct definition for these scalars.

@AdamGS AdamGS Jun 19, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my mind a "zero" struct is an empty struct (as in - 0 fields) of length 0

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this is a scalar, which means it's already length 1. And I would think of this as is_default almost where we do not have control over the struct fields, we just call is_default on all fields (via auto-derived Default impl of the struct).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More importantly, we have to implement the functions in vortex-array/src/scalar/scalar_value.rs somehow based on the dtype, and the is_ functions have to mirror that.

DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
DType::Variant(_) => self.as_variant().is_zero()?,
DType::Extension(_) => self.as_extension().to_storage_scalar().is_zero()?,
Expand Down Expand Up @@ -428,3 +437,157 @@ fn partial_cmp_struct_values(

Some(Ordering::Equal)
}

#[cfg(test)]
mod tests {
use std::sync::Arc;

use rstest::rstest;

use crate::dtype::DType;
use crate::dtype::Nullability;
use crate::dtype::PType;
use crate::dtype::StructFields;
use crate::scalar::Scalar;

fn i32_scalar(value: i32) -> Scalar {
Scalar::primitive::<i32>(value, Nullability::NonNullable)
}

fn nullable_i32(value: Option<i32>) -> Scalar {
match value {
Some(value) => Scalar::primitive::<i32>(value, Nullability::Nullable),
None => Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable)),
}
}

fn ab_struct_dtype(nullability: Nullability) -> DType {
DType::Struct(
StructFields::new(
["a", "b"].into(),
vec![
DType::Primitive(PType::I32, Nullability::NonNullable),
DType::Utf8(Nullability::NonNullable),
],
),
nullability,
)
}

#[rstest]
// A fixed-size list of all-zero elements is itself zero.
#[case(vec![0, 0], Some(true))]
#[case(vec![0], Some(true))]
// A single non-zero element makes the whole list non-zero. On `develop` these incorrectly
// returned `Some(true)` because only the element count was checked.
#[case(vec![0, 5], Some(false))]
#[case(vec![5, 0], Some(false))]
#[case(vec![1, 2], Some(false))]
fn fixed_size_list_is_zero(#[case] values: Vec<i32>, #[case] expected: Option<bool>) {
let element_dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
let children: Vec<Scalar> = values.into_iter().map(i32_scalar).collect();
let scalar = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable);
assert_eq!(scalar.is_zero(), expected);
}

#[test]
fn null_fixed_size_list_is_zero_is_none() {
let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable));
let scalar = Scalar::null(DType::FixedSizeList(
element_dtype,
2,
Nullability::Nullable,
));
assert_eq!(scalar.is_zero(), None);
}

#[test]
fn fixed_size_list_with_null_element_is_not_zero() {
// A non-null fixed-size list containing a null element is not a zero value. On `develop`
// this incorrectly returned `Some(true)`.
let element_dtype = DType::Primitive(PType::I32, Nullability::Nullable);
let children = vec![nullable_i32(Some(0)), nullable_i32(None)];
let scalar = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable);
assert_eq!(scalar.is_zero(), Some(false));
}

#[test]
fn struct_with_all_zero_fields_is_zero() {
let scalar = Scalar::struct_(
ab_struct_dtype(Nullability::NonNullable),
vec![i32_scalar(0), Scalar::utf8("", Nullability::NonNullable)],
);
assert_eq!(scalar.is_zero(), Some(true));
}

#[rstest]
// A non-zero primitive field, a non-empty string field, or both, make the struct non-zero. On
// `develop` all of these incorrectly returned `Some(true)`.
#[case(5, "")]
#[case(0, "x")]
#[case(7, "y")]
fn struct_with_non_zero_field_is_not_zero(#[case] a: i32, #[case] b: &str) {
let scalar = Scalar::struct_(
ab_struct_dtype(Nullability::NonNullable),
vec![i32_scalar(a), Scalar::utf8(b, Nullability::NonNullable)],
);
assert_eq!(scalar.is_zero(), Some(false));
}

#[test]
fn null_struct_is_zero_is_none() {
let scalar = Scalar::null(ab_struct_dtype(Nullability::Nullable));
assert_eq!(scalar.is_zero(), None);
}

#[test]
fn struct_with_null_field_is_not_zero() {
// A non-null struct with a null field is not a zero value. On `develop` this incorrectly
// returned `Some(true)`.
let dtype = DType::Struct(
StructFields::new(
["a", "b"].into(),
vec![
DType::Primitive(PType::I32, Nullability::Nullable),
DType::Primitive(PType::I32, Nullability::Nullable),
],
),
Nullability::NonNullable,
);
let scalar = Scalar::struct_(dtype, vec![nullable_i32(Some(0)), nullable_i32(None)]);
assert_eq!(scalar.is_zero(), Some(false));
}

#[test]
fn nested_struct_of_fixed_size_list_recurses() {
// Zero-checking must recurse through both structs and fixed-size lists. On `develop` the
// non-zero case incorrectly returned `Some(true)`.
let element_dtype = Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable));
let fsl_dtype =
DType::FixedSizeList(Arc::clone(&element_dtype), 2, Nullability::NonNullable);
let struct_dtype = DType::Struct(
StructFields::new(["fsl"].into(), vec![fsl_dtype]),
Nullability::NonNullable,
);

let all_zero = Scalar::struct_(
struct_dtype.clone(),
vec![Scalar::fixed_size_list(
Arc::clone(&element_dtype),
vec![i32_scalar(0), i32_scalar(0)],
Nullability::NonNullable,
)],
);
assert_eq!(all_zero.is_zero(), Some(true));

let with_non_zero = Scalar::struct_(
struct_dtype,
vec![Scalar::fixed_size_list(
element_dtype,
vec![i32_scalar(0), i32_scalar(9)],
Nullability::NonNullable,
)],
);
assert_eq!(with_non_zero.is_zero(), Some(false));
}
}
Loading