From 2654b3a52813d8dfe474224800d754a8e741de3b Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Wed, 10 Jun 2026 17:17:08 -0400 Subject: [PATCH 1/2] feat(vortex-geo): Arrow import/export for the native Point type Register ArrowImportVTable/ArrowExportVTable for the Point extension type, mapping geoarrow.point fields (separated coordinates) to and from vortex.geo.point with the CRS carried through. Interleaved coordinates are rejected at field import. The CRS metadata conversion is shared with the WKB vtables, and the coordinate storage dtype now has a canonical constructor inverse to coordinate_dimension. Signed-off-by: Nemo Yu --- vortex-geo/src/extension/coordinate.rs | 70 ++++++ vortex-geo/src/extension/mod.rs | 29 +++ vortex-geo/src/extension/point.rs | 209 +++++++++++++---- vortex-geo/src/extension/wkb.rs | 25 +- vortex-geo/src/lib.rs | 311 +------------------------ vortex-geo/src/scalar_fn/distance.rs | 21 +- vortex-geo/src/test_harness.rs | 29 +++ vortex-geo/src/tests/mod.rs | 20 ++ vortex-geo/src/tests/point.rs | 191 +++++++++++++++ vortex-geo/src/tests/wkb.rs | 288 +++++++++++++++++++++++ 10 files changed, 807 insertions(+), 386 deletions(-) create mode 100644 vortex-geo/src/test_harness.rs create mode 100644 vortex-geo/src/tests/mod.rs create mode 100644 vortex-geo/src/tests/point.rs create mode 100644 vortex-geo/src/tests/wkb.rs diff --git a/vortex-geo/src/extension/coordinate.rs b/vortex-geo/src/extension/coordinate.rs index 5624886dbb5..9d35a511c09 100644 --- a/vortex-geo/src/extension/coordinate.rs +++ b/vortex-geo/src/extension/coordinate.rs @@ -14,6 +14,7 @@ use std::fmt::Display; use std::fmt::Formatter; +use geoarrow::datatypes::Dimension as GeoArrowDimension; use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; use vortex_array::arrays::ExtensionArray; @@ -25,6 +26,7 @@ use vortex_array::dtype::DType; use vortex_array::dtype::FieldNames; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; +use vortex_array::dtype::StructFields; use vortex_array::scalar::Scalar; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -63,6 +65,38 @@ impl Dimension { _ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"), }) } + + /// The coordinate field names of this dimension, in GeoArrow order. + pub(crate) fn field_names(self) -> &'static [&'static str] { + match self { + Dimension::Xy => &["x", "y"], + Dimension::Xyz => &["x", "y", "z"], + Dimension::Xym => &["x", "y", "m"], + Dimension::Xyzm => &["x", "y", "z", "m"], + } + } +} + +impl From for Dimension { + fn from(dim: GeoArrowDimension) -> Self { + match dim { + GeoArrowDimension::XY => Dimension::Xy, + GeoArrowDimension::XYZ => Dimension::Xyz, + GeoArrowDimension::XYM => Dimension::Xym, + GeoArrowDimension::XYZM => Dimension::Xyzm, + } + } +} + +impl From for GeoArrowDimension { + fn from(dim: Dimension) -> Self { + match dim { + Dimension::Xy => GeoArrowDimension::XY, + Dimension::Xyz => GeoArrowDimension::XYZ, + Dimension::Xym => GeoArrowDimension::XYM, + Dimension::Xyzm => GeoArrowDimension::XYZM, + } + } } /// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them. @@ -122,6 +156,21 @@ pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult { Dimension::from_field_names(fields.names()) } +/// The canonical storage dtype for `dim`: a `Struct` of non-nullable `f64` coordinate fields, +/// with `nullability` at the struct (per-point) level. Inverse of [`coordinate_dimension`]. +pub(crate) fn coordinate_storage_dtype(dim: Dimension, nullability: Nullability) -> DType { + let names = dim.field_names(); + let fields = std::iter::repeat_n( + DType::Primitive(PType::F64, Nullability::NonNullable), + names.len(), + ) + .collect::>(); + DType::Struct( + StructFields::new(FieldNames::from(names), fields), + nullability, + ) +} + /// Decode a [`Coordinate`] from a coordinate `Struct` scalar (`z`/`m` read iff /// present, so the same decoder serves every dimension). pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult { @@ -202,6 +251,7 @@ mod tests { use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::dtype::FieldNames; + use vortex_array::dtype::Nullability; use vortex_array::dtype::extension::ExtDType; use vortex_array::session::ArraySession; use vortex_array::validity::Validity; @@ -209,10 +259,30 @@ mod tests { use vortex_session::VortexSession; use super::Coordinate; + use super::Dimension; + use super::coordinate_dimension; + use super::coordinate_storage_dtype; use super::parse_storage; use crate::extension::GeoMetadata; use crate::extension::Point; + /// Each dimension round-trips through its field names and canonical storage dtype. + #[test] + fn storage_dtype_roundtrips_dimension() -> VortexResult<()> { + let cases = [ + (Dimension::Xy, ["x", "y"].as_slice()), + (Dimension::Xyz, ["x", "y", "z"].as_slice()), + (Dimension::Xym, ["x", "y", "m"].as_slice()), + (Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()), + ]; + for (dim, names) in cases { + assert_eq!(dim.field_names(), names); + let dtype = coordinate_storage_dtype(dim, Nullability::NonNullable); + assert_eq!(coordinate_dimension(&dtype)?, dim); + } + Ok(()) + } + /// Display emits WKT, including `z`/`m` when present. #[test] fn display_is_wkt() { diff --git a/vortex-geo/src/extension/mod.rs b/vortex-geo/src/extension/mod.rs index d69dd239d14..cc3a1f6e532 100644 --- a/vortex-geo/src/extension/mod.rs +++ b/vortex-geo/src/extension/mod.rs @@ -6,7 +6,10 @@ mod point; mod wkb; use std::fmt::Display; +use std::sync::Arc; +use geoarrow::datatypes::Crs; +use geoarrow::datatypes::Metadata; pub use point::*; pub use wkb::*; @@ -30,6 +33,32 @@ impl Display for GeoMetadata { } } +/// The GeoArrow [`Metadata`] equivalent of `geo_metadata`. +pub(crate) fn geoarrow_metadata(geo_metadata: &GeoMetadata) -> Arc { + Arc::new(Metadata::new( + geo_metadata + .crs + .as_ref() + .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) + .unwrap_or_default(), + None, + )) +} + +/// Recover [`GeoMetadata`] from GeoArrow metadata. +pub(crate) fn geo_metadata_from_arrow(metadata: &Metadata) -> GeoMetadata { + let crs = metadata.crs().crs_value().map(|value| { + // `Crs::from_unknown_crs_type` stores the user's string verbatim as a JSON string + // value, so prefer the raw string when available to round-trip cleanly. For other + // CRS encodings (PROJJSON object, etc.), fall back to the JSON-encoded form. + value + .as_str() + .map(str::to_string) + .unwrap_or_else(|| value.to_string()) + }); + GeoMetadata { crs } +} + #[cfg(test)] mod tests { use prost::Message; diff --git a/vortex-geo/src/extension/point.rs b/vortex-geo/src/extension/point.rs index 119de3a7e45..600082a1998 100644 --- a/vortex-geo/src/extension/point.rs +++ b/vortex-geo/src/extension/point.rs @@ -6,18 +6,47 @@ //! XYZM — tagged with [`GeoMetadata`] (CRS). `z` is an optional elevation and `m` an optional //! measure: an arbitrary per-point value such as distance along a route or a timestamp. +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::extension::ExtensionType; +use geoarrow::array::IntoArrow; +use geoarrow::array::PointArray; +use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::PointType; use prost::Message; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::extension::ExtensionArrayExt; +use vortex_array::arrow::ArrowExport; +use vortex_array::arrow::ArrowExportVTable; +use vortex_array::arrow::ArrowImport; +use vortex_array::arrow::ArrowImportVTable; +use vortex_array::arrow::ArrowSession; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::arrow::FromArrowArray; +use vortex_array::dtype::DType; use vortex_array::dtype::extension::ExtDType; use vortex_array::dtype::extension::ExtId; use vortex_array::dtype::extension::ExtVTable; use vortex_array::scalar::Scalar; use vortex_array::scalar::ScalarValue; use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; +use vortex_session::registry::CachedId; +use vortex_session::registry::Id; use super::GeoMetadata; use super::coordinate::Coordinate; +use super::coordinate::Dimension; use super::coordinate::coordinate_dimension; use super::coordinate::coordinate_from_struct; +use super::coordinate::coordinate_storage_dtype; +use super::geo_metadata_from_arrow; +use super::geoarrow_metadata; /// A single location: `geoarrow.point`, stored as `Struct` of non-nullable `f64`. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] @@ -55,18 +84,143 @@ impl ExtVTable for Point { } } +static ARROW_POINT: CachedId = CachedId::new(PointType::NAME); + +/// The `geoarrow.point` extension type for `dimension`, with separated (struct) coordinates +/// matching `Point` storage. +fn point_type(geo_metadata: &GeoMetadata, dimension: Dimension) -> PointType { + PointType::new(dimension.into(), geoarrow_metadata(geo_metadata)) +} + +impl ArrowExportVTable for Point { + fn arrow_ext_id(&self) -> Id { + *ARROW_POINT + } + + fn vortex_id(&self) -> Id { + self.id() + } + + fn to_arrow_field( + &self, + name: &str, + dtype: &DType, + session: &ArrowSession, + ) -> VortexResult> { + let ext_type = dtype.as_extension(); + let geo_metadata = ext_type.metadata::(); + let dimension = coordinate_dimension(ext_type.storage_dtype())?; + + let mut field = session.to_arrow_field(name, ext_type.storage_dtype())?; + field.try_with_extension_type(point_type(geo_metadata, dimension))?; + + Ok(Some(field)) + } + + fn execute_arrow( + &self, + array: ArrayRef, + target: &Field, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let is_point = array + .dtype() + .as_extension_opt() + .map(|ext| ext.is::()) + .unwrap_or(false); + if !is_point { + return Ok(ArrowExport::Unsupported(array)); + } + + let Ok(point_meta) = target.try_extension_type::() else { + return Ok(ArrowExport::Unsupported(array)); + }; + if point_meta.coord_type() != CoordType::Separated { + return Ok(ArrowExport::Unsupported(array)); + } + + let executed = array.execute::(ctx)?; + let storage = executed.storage_array().clone(); + + let storage_field = Field::new( + String::new(), + target.data_type().clone(), + target.is_nullable(), + ); + let session = ctx.session().clone(); + let arrow_storage = session + .arrow() + .execute_arrow(storage, Some(&storage_field), ctx)?; + + // Round-trip through the GeoArrow point array type: this validates that the storage is + // the separated-coordinate struct layout expected for a `PointType` extension field. + let points = PointArray::try_from((arrow_storage.as_ref(), point_meta)) + .map_err(|e| vortex_err!("failed to construct PointArray: {e}"))?; + + Ok(ArrowExport::Exported(points.into_arrow())) + } +} + +impl ArrowImportVTable for Point { + fn arrow_ext_id(&self) -> Id { + *ARROW_POINT + } + + fn from_arrow_field(&self, field: &Field) -> VortexResult> { + let Ok(point_meta) = field.try_extension_type::() else { + return Ok(None); + }; + if point_meta.coord_type() != CoordType::Separated { + vortex_bail!( + "geoarrow.point with interleaved coordinates is not supported; \ + re-encode with separated (struct) coordinates" + ); + } + + let storage_dtype = + coordinate_storage_dtype(point_meta.dimension().into(), field.is_nullable().into()); + Ok(Some(DType::Extension( + ExtDType::try_with_vtable( + Point, + geo_metadata_from_arrow(point_meta.metadata()), + storage_dtype, + )? + .erased(), + ))) + } + + fn from_arrow_array( + &self, + array: ArrowArrayRef, + field: &Field, + dtype: &DType, + ) -> VortexResult { + let Some(ext_dtype) = dtype.as_extension_opt() else { + return Ok(ArrowImport::Unsupported(array)); + }; + if !ext_dtype.is::() + || field.try_extension_type::().is_err() + || !matches!(array.data_type(), DataType::Struct(_)) + { + return Ok(ArrowImport::Unsupported(array)); + } + + let storage = ArrayRef::from_arrow(array.as_ref(), field.is_nullable())?; + Ok(ArrowImport::Imported( + ExtensionArray::try_new(ext_dtype.clone(), storage)?.into_array(), + )) + } +} + #[cfg(test)] mod tests { use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; - use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::dtype::DType; - use vortex_array::dtype::FieldNames; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; - use vortex_array::dtype::StructFields; use vortex_array::dtype::extension::ExtDType; use vortex_array::session::ArraySession; use vortex_error::VortexResult; @@ -76,8 +230,9 @@ mod tests { use crate::extension::GeoMetadata; use crate::extension::coordinate::Coordinate; use crate::extension::coordinate::Dimension; - use crate::extension::coordinate::coordinate_dimension; use crate::extension::coordinate::coordinate_from_scalar; + use crate::extension::coordinate::coordinate_storage_dtype; + use crate::test_harness::point_column; fn geo_meta() -> GeoMetadata { GeoMetadata { @@ -85,32 +240,16 @@ mod tests { } } - /// A coordinate storage dtype with the given field names, non-nullable `f64` per field. - fn coordinate_dtype(names: &[&'static str]) -> DType { - let fields = std::iter::repeat_n( - DType::Primitive(PType::F64, Nullability::NonNullable), - names.len(), - ) - .collect::>(); - DType::Struct( - StructFields::new(FieldNames::from(names), fields), - Nullability::NonNullable, - ) - } - - /// `Point` accepts every GeoArrow dimension; the canonical field names round-trip to their - /// dimension, so a `z`/`m` swap or a mislabel would be caught. + /// `Point` accepts the canonical coordinate storage of every GeoArrow dimension. #[test] fn point_validates_every_dimension() -> VortexResult<()> { - let cases = [ - (Dimension::Xy, ["x", "y"].as_slice()), - (Dimension::Xyz, ["x", "y", "z"].as_slice()), - (Dimension::Xym, ["x", "y", "m"].as_slice()), - (Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()), - ]; - for (dim, names) in cases { - let storage = coordinate_dtype(names); - assert_eq!(coordinate_dimension(&storage)?, dim); + for dim in [ + Dimension::Xy, + Dimension::Xyz, + Dimension::Xym, + Dimension::Xyzm, + ] { + let storage = coordinate_storage_dtype(dim, Nullability::NonNullable); ExtDType::::try_new(geo_meta(), storage)?; } Ok(()) @@ -138,19 +277,7 @@ mod tests { let session = VortexSession::empty().with::(); let mut ctx = session.create_execution_ctx(); - let storage = StructArray::from_fields(&[ - ( - "x", - PrimitiveArray::from_iter(vec![1.0f64, -111.7610]).into_array(), - ), - ( - "y", - PrimitiveArray::from_iter(vec![2.0f64, 34.8697]).into_array(), - ), - ])? - .into_array(); - let dtype = ExtDType::::try_new(geo_meta(), storage.dtype().clone())?; - let points = ExtensionArray::new(dtype.erased(), storage).into_array(); + let points = point_column(vec![1.0, -111.7610], vec![2.0, 34.8697])?; assert_eq!( coordinate_from_scalar(&points.execute_scalar(0, &mut ctx)?)?, diff --git a/vortex-geo/src/extension/wkb.rs b/vortex-geo/src/extension/wkb.rs index 172da41b93b..ddcfd9d53b6 100644 --- a/vortex-geo/src/extension/wkb.rs +++ b/vortex-geo/src/extension/wkb.rs @@ -12,8 +12,6 @@ use arrow_schema::extension::ExtensionType; use geoarrow::array::GenericWkbArray; use geoarrow::array::IntoArrow; use geoarrow::array::WkbViewArray; -use geoarrow::datatypes::Crs; -use geoarrow::datatypes::Metadata; use geoarrow::datatypes::WkbType; use prost::Message; use vortex_array::ArrayRef; @@ -43,6 +41,8 @@ use vortex_session::registry::Id; use wkb::reader::GeometryType; use crate::extension::GeoMetadata; +use crate::extension::geo_metadata_from_arrow; +use crate::extension::geoarrow_metadata; /// A typed handle to an [`ExtensionArray`] that contains WKB-encoded data. /// @@ -292,26 +292,9 @@ impl ArrowImportVTable for WellKnownBinary { } fn wkb_type(geo_metadata: &GeoMetadata) -> WkbType { - let metadata = Metadata::new( - geo_metadata - .crs - .as_ref() - .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) - .unwrap_or_default(), - None, - ); - WkbType::new(Arc::new(metadata)) + WkbType::new(geoarrow_metadata(geo_metadata)) } fn geo_metadata(wkb_type: &WkbType) -> GeoMetadata { - let crs = wkb_type.metadata().crs().crs_value().map(|value| { - // `Crs::from_unknown_crs_type` stores the user's string verbatim as a JSON string - // value, so prefer the raw string when available to round-trip cleanly. For other - // CRS encodings (PROJJSON object, etc.), fall back to the JSON-encoded form. - value - .as_str() - .map(str::to_string) - .unwrap_or_else(|| value.to_string()) - }); - GeoMetadata { crs } + geo_metadata_from_arrow(wkb_type.metadata()) } diff --git a/vortex-geo/src/lib.rs b/vortex-geo/src/lib.rs index 9d0cde26f9e..d47b0976bc6 100644 --- a/vortex-geo/src/lib.rs +++ b/vortex-geo/src/lib.rs @@ -14,6 +14,11 @@ use crate::scalar_fn::distance::GeoDistance; pub mod extension; pub mod scalar_fn; +#[cfg(test)] +mod test_harness; +#[cfg(test)] +mod tests; + /// Set up a session with support for geospatial extension types, encodings and layouts. pub fn initialize(session: &VortexSession) { // Register the geospatial extension types. @@ -21,311 +26,9 @@ pub fn initialize(session: &VortexSession) { session.arrow().register_exporter(Arc::new(WellKnownBinary)); session.arrow().register_importer(Arc::new(WellKnownBinary)); session.dtypes().register(Point); + session.arrow().register_exporter(Arc::new(Point)); + session.arrow().register_importer(Arc::new(Point)); // Register the geometry scalar functions. session.scalar_fns().register(GeoDistance); } - -#[cfg(test)] -mod tests { - use std::sync::Arc; - use std::sync::LazyLock; - - use arrow_array::Array as _; - use arrow_array::ArrayRef as ArrowArrayRef; - use arrow_array::BinaryArray; - use arrow_array::BinaryViewArray; - use arrow_array::LargeBinaryArray; - use arrow_array::cast::AsArray; - use arrow_schema::DataType; - use arrow_schema::Field; - use arrow_schema::extension::ExtensionType as _; - use geo_traits::to_geo::ToGeoGeometry; - use geo_types::Coord; - use geo_types::Geometry; - use geo_types::LineString; - use geo_types::Polygon; - use geoarrow::datatypes::Crs; - use geoarrow::datatypes::Metadata; - use geoarrow::datatypes::WkbType; - use vortex_array::IntoArray; - use vortex_array::VortexSessionExecute; - use vortex_array::arrays::ExtensionArray; - use vortex_array::arrays::varbin::builder::VarBinBuilder; - use vortex_array::arrow::ArrowSessionExt; - use vortex_array::dtype::DType; - use vortex_array::dtype::Nullability; - use vortex_array::dtype::extension::ExtDType; - use vortex_array::dtype::extension::ExtVTable; - use vortex_array::session::ArraySession; - use vortex_error::VortexResult; - use vortex_error::vortex_err; - use vortex_session::VortexSession; - use wkb::writer::WriteOptions; - - use crate::extension::GeoMetadata; - use crate::extension::WellKnownBinary; - - static SESSION: LazyLock = LazyLock::new(|| { - let session = VortexSession::empty().with::(); - crate::initialize(&session); - session - }); - - #[test] - fn test_array() -> VortexResult<()> { - let mut execution_ctx = SESSION.create_execution_ctx(); - - let mut buf = Vec::new(); - - let mut builder = VarBinBuilder::::with_capacity(3); - - let polygon = Geometry::Polygon(Polygon::new( - LineString::new(vec![ - Coord::zero(), - Coord { x: 100.0, y: 0.0 }, - Coord { x: 100.0, y: 100.0 }, - Coord { x: 0.0, y: 100.0 }, - Coord::zero(), - ]), - vec![], - )); - - // We should always prefer to write little-endian, which is the default option. - wkb::writer::write_geometry(&mut buf, &polygon, &WriteOptions::default()) - .map_err(|e| vortex_err!("writing WKB failed: {e}"))?; - - // Push same polygon 3 times - builder.append_value(&buf); - builder.append_value(&buf); - builder.append_value(&buf); - - let dtype = ExtDType::::try_new( - GeoMetadata { - crs: Some("EPSG:4326".to_string()), - }, - DType::Binary(Nullability::NonNullable), - )?; - - let array = builder.finish(DType::Binary(Nullability::NonNullable)); - let array = ExtensionArray::new(dtype.clone().erased(), array.into_array()).into_array(); - - for idx in 0..3 { - let geom = array.execute_scalar(idx, &mut execution_ctx)?; - let wkb = WellKnownBinary::unpack_native(&dtype, geom.value().unwrap())?; - - assert_eq!(wkb.to_geometry(), polygon); - } - - Ok(()) - } - - fn wkb_extension_array() -> VortexResult<(Vec, vortex_array::ArrayRef)> { - let polygon = Geometry::Polygon(Polygon::new( - LineString::new(vec![ - Coord::zero(), - Coord { x: 100.0, y: 0.0 }, - Coord { x: 100.0, y: 100.0 }, - Coord { x: 0.0, y: 100.0 }, - Coord::zero(), - ]), - vec![], - )); - let mut buf = Vec::new(); - wkb::writer::write_geometry(&mut buf, &polygon, &WriteOptions::default()) - .map_err(|e| vortex_err!("writing WKB failed: {e}"))?; - - let mut builder = VarBinBuilder::::with_capacity(3); - builder.append_value(&buf); - builder.append_value(&buf); - builder.append_value(&buf); - - let dtype = ExtDType::::try_new( - GeoMetadata { - crs: Some("EPSG:4326".to_string()), - }, - DType::Binary(Nullability::NonNullable), - )?; - let storage = builder.finish(DType::Binary(Nullability::NonNullable)); - let array = ExtensionArray::new(dtype.erased(), storage.into_array()).into_array(); - Ok((buf, array)) - } - - #[test] - fn export_arrow_field_carries_wkb_extension() -> VortexResult<()> { - let (_, array) = wkb_extension_array()?; - let field = SESSION.arrow().to_arrow_field("geom", array.dtype())?; - assert_eq!(field.extension_type_name(), Some(WkbType::NAME)); - // Vortex's canonical mapping of `DType::Binary` is Arrow's `BinaryView`. - assert_eq!(field.data_type(), &DataType::BinaryView); - Ok(()) - } - - #[test] - fn execute_arrow_exports_wkb_to_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::Binary, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::Binary); - let binary = exported.as_binary::(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) - } - - #[test] - fn execute_arrow_exports_wkb_to_large_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::LargeBinary, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::LargeBinary); - let binary = exported.as_binary::(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) - } - - #[test] - fn execute_arrow_exports_wkb_to_binary_view() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::BinaryView, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::BinaryView); - let binary = exported.as_binary_view(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) - } - - fn wkb_field(name: &str, data_type: DataType, nullable: bool, crs: Option<&str>) -> Field { - let crs = crs - .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) - .unwrap_or_default(); - let wkb_type = WkbType::new(Arc::new(Metadata::new(crs, None))); - Field::new(name, data_type, nullable).with_extension_type(wkb_type) - } - - fn assert_imported_wkb_dtype(dtype: &DType, expected_crs: Option<&str>, nullable: bool) { - let DType::Extension(ext) = dtype else { - panic!("expected Extension dtype, got {dtype}"); - }; - assert!(ext.is::()); - assert_eq!(ext.storage_dtype(), &DType::Binary(nullable.into())); - let geo = ext.metadata::(); - assert_eq!(geo.crs.as_deref(), expected_crs); - } - - #[test] - fn import_arrow_field_recovers_wkb_extension() -> VortexResult<()> { - let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); - let dtype = SESSION.arrow().from_arrow_field(&field)?; - assert_imported_wkb_dtype(&dtype, Some("EPSG:4326"), false); - Ok(()) - } - - #[test] - fn import_arrow_field_without_crs() -> VortexResult<()> { - let field = wkb_field("geom", DataType::BinaryView, true, None); - let dtype = SESSION.arrow().from_arrow_field(&field)?; - assert_imported_wkb_dtype(&dtype, None, true); - Ok(()) - } - - #[test] - fn import_arrow_array_from_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, _) = wkb_extension_array()?; - let arrow: ArrowArrayRef = Arc::new(BinaryArray::from_iter_values([ - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - ])); - let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); - - let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; - assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); - - for idx in 0..3 { - let geom = imported.execute_scalar(idx, &mut ctx)?; - assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); - } - Ok(()) - } - - #[test] - fn import_arrow_array_from_large_binary() -> VortexResult<()> { - let (wkb_bytes, _) = wkb_extension_array()?; - let arrow: ArrowArrayRef = Arc::new(LargeBinaryArray::from_iter_values([ - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - ])); - let field = wkb_field("geom", DataType::LargeBinary, false, Some("EPSG:4326")); - - let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; - assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); - assert_eq!(imported.len(), 2); - Ok(()) - } - - #[test] - fn import_arrow_array_from_binary_view() -> VortexResult<()> { - let (wkb_bytes, _) = wkb_extension_array()?; - let arrow: ArrowArrayRef = Arc::new(BinaryViewArray::from_iter_values([ - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - ])); - let field = wkb_field("geom", DataType::BinaryView, false, None); - - let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; - assert_imported_wkb_dtype(imported.dtype(), None, false); - assert_eq!(imported.len(), 4); - Ok(()) - } - - #[test] - fn import_arrow_array_roundtrips_through_export() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, original) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::BinaryView, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(original, Some(&field), &mut ctx)?; - let arrow_field = wkb_field("geom", DataType::BinaryView, false, Some("EPSG:4326")); - let reimported = SESSION.arrow().from_arrow_array(exported, &arrow_field)?; - - assert_imported_wkb_dtype(reimported.dtype(), Some("EPSG:4326"), false); - for idx in 0..3 { - let geom = reimported.execute_scalar(idx, &mut ctx)?; - assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); - } - Ok(()) - } -} diff --git a/vortex-geo/src/scalar_fn/distance.rs b/vortex-geo/src/scalar_fn/distance.rs index e621c4bbad1..c65f1ab78dc 100644 --- a/vortex-geo/src/scalar_fn/distance.rs +++ b/vortex-geo/src/scalar_fn/distance.rs @@ -153,32 +153,13 @@ mod tests { use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ConstantArray; - use vortex_array::arrays::ExtensionArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::arrays::StructArray; - use vortex_array::dtype::extension::ExtDType; use vortex_array::session::ArraySession; use vortex_error::VortexResult; use vortex_session::VortexSession; use super::GeoDistance; use super::euclidean_distance; - use crate::extension::GeoMetadata; - use crate::extension::Point; - - /// A `Point` column (CRS `EPSG:4326`) over the given x/y coordinates. - fn point_column(xs: Vec, ys: Vec) -> VortexResult { - let storage = StructArray::from_fields(&[ - ("x", PrimitiveArray::from_iter(xs).into_array()), - ("y", PrimitiveArray::from_iter(ys).into_array()), - ])? - .into_array(); - let metadata = GeoMetadata { - crs: Some("EPSG:4326".to_string()), - }; - let dtype = ExtDType::::try_new(metadata, storage.dtype().clone())?; - Ok(ExtensionArray::new(dtype.erased(), storage).into_array()) - } + use crate::test_harness::point_column; /// A constant `Point` column of length `len`, every row at `(x, y)`. fn point_constant( diff --git a/vortex-geo/src/test_harness.rs b/vortex-geo/src/test_harness.rs new file mode 100644 index 00000000000..9f066fb4d14 --- /dev/null +++ b/vortex-geo/src/test_harness.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Shared test helpers for the geospatial extension types. + +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_array::dtype::extension::ExtDType; +use vortex_error::VortexResult; + +use crate::extension::GeoMetadata; +use crate::extension::Point; + +/// A `Point` column (CRS `EPSG:4326`) over the given x/y coordinates. +pub(crate) fn point_column(xs: Vec, ys: Vec) -> VortexResult { + let storage = StructArray::from_fields(&[ + ("x", PrimitiveArray::from_iter(xs).into_array()), + ("y", PrimitiveArray::from_iter(ys).into_array()), + ])? + .into_array(); + let metadata = GeoMetadata { + crs: Some("EPSG:4326".to_string()), + }; + let dtype = ExtDType::::try_new(metadata, storage.dtype().clone())?; + Ok(ExtensionArray::new(dtype.erased(), storage).into_array()) +} diff --git a/vortex-geo/src/tests/mod.rs b/vortex-geo/src/tests/mod.rs new file mode 100644 index 00000000000..86a89a60b5b --- /dev/null +++ b/vortex-geo/src/tests/mod.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Arrow interop tests for the geospatial extension types, exercising the session wiring set up +//! by [`crate::initialize`]. + +mod point; +mod wkb; + +use std::sync::LazyLock; + +use vortex_array::session::ArraySession; +use vortex_session::VortexSession; + +/// A session with the geospatial types and functions registered. +static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + crate::initialize(&session); + session +}); diff --git a/vortex-geo/src/tests/point.rs b/vortex-geo/src/tests/point.rs new file mode 100644 index 00000000000..074ef97f0d3 --- /dev/null +++ b/vortex-geo/src/tests/point.rs @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Arrow interop for the `vortex.geo.point` extension type (`geoarrow.point`). + +use std::sync::Arc; + +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_array::Float64Array; +use arrow_array::StructArray as ArrowStructArray; +use arrow_array::cast::AsArray; +use arrow_array::types::Float64Type; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::Fields; +use arrow_schema::extension::ExtensionType as _; +use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::Crs; +use geoarrow::datatypes::Dimension as GeoArrowDimension; +use geoarrow::datatypes::Metadata; +use geoarrow::datatypes::PointType; +use vortex_array::VortexSessionExecute; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_error::VortexResult; +use vortex_error::vortex_err; + +use super::SESSION; +use crate::extension::Point; +use crate::extension::coordinate::Coordinate; +use crate::extension::coordinate::coordinate_from_scalar; +use crate::test_harness::point_column; + +/// A `geoarrow.point` Arrow field with separated (struct) XY coordinates. +fn point_field(name: &str, nullable: bool, crs: Option<&str>) -> Field { + let crs = crs + .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) + .unwrap_or_default(); + let metadata = Arc::new(Metadata::new(crs, None)); + PointType::new(GeoArrowDimension::XY, metadata).to_field(name, nullable) +} + +/// An Arrow `Struct` point array with non-nullable `Float64` children. +fn arrow_point_struct(xs: Vec, ys: Vec) -> ArrowStructArray { + let fields: Fields = vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ] + .into(); + ArrowStructArray::new( + fields, + vec![ + Arc::new(Float64Array::from(xs)) as ArrowArrayRef, + Arc::new(Float64Array::from(ys)), + ], + None, + ) +} + +/// The exported Arrow field carries the `geoarrow.point` extension over the separated +/// `Struct` coordinate layout. +#[test] +fn export_field_carries_extension() -> VortexResult<()> { + let array = point_column(vec![1.0], vec![2.0])?; + let field = SESSION.arrow().to_arrow_field("loc", array.dtype())?; + + assert_eq!(field.extension_type_name(), Some(PointType::NAME)); + let DataType::Struct(fields) = field.data_type() else { + panic!("expected Struct, got {}", field.data_type()); + }; + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].name(), "x"); + assert_eq!(fields[0].data_type(), &DataType::Float64); + assert_eq!(fields[1].name(), "y"); + assert_eq!(fields[1].data_type(), &DataType::Float64); + Ok(()) +} + +/// Export materializes the point column as an Arrow struct with the original ordinates. +#[test] +fn exports_to_struct() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let array = point_column(vec![1.0, 3.0], vec![2.0, 4.0])?; + + let target = point_field("loc", false, Some("EPSG:4326")); + let exported = SESSION + .arrow() + .execute_arrow(array, Some(&target), &mut ctx)?; + + let points = exported.as_struct(); + let ordinates = |name: &str| -> VortexResult> { + Ok(points + .column_by_name(name) + .ok_or_else(|| vortex_err!("missing {name} column"))? + .as_primitive::() + .values() + .to_vec()) + }; + assert_eq!(ordinates("x")?, vec![1.0, 3.0]); + assert_eq!(ordinates("y")?, vec![2.0, 4.0]); + Ok(()) +} + +/// An imported `geoarrow.point` field maps to the Point extension dtype, recovering the +/// CRS, coordinate field names, and nullability. +#[test] +fn import_field_recovers_extension() -> VortexResult<()> { + let field = point_field("loc", true, Some("EPSG:4326")); + let dtype = SESSION.arrow().from_arrow_field(&field)?; + + let DType::Extension(ext) = &dtype else { + panic!("expected Extension dtype, got {dtype}"); + }; + assert!(ext.is::()); + assert_eq!(ext.metadata::().crs.as_deref(), Some("EPSG:4326")); + + let DType::Struct(fields, nullability) = ext.storage_dtype() else { + panic!("expected Struct storage, got {}", ext.storage_dtype()); + }; + assert_eq!(*nullability, Nullability::Nullable); + let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); + assert_eq!(names, vec!["x", "y"]); + Ok(()) +} + +/// A field with interleaved (`FixedSizeList`) coordinates fails to import. +#[test] +fn import_interleaved_field_fails() { + let point_type = PointType::new(GeoArrowDimension::XY, Default::default()) + .with_coord_type(CoordType::Interleaved); + let field = point_type.to_field("loc", false); + assert!(SESSION.arrow().from_arrow_field(&field).is_err()); +} + +/// Import wraps the Arrow struct's coordinate buffers into a Point column. +#[test] +fn imports_from_struct() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let arrow: ArrowArrayRef = + Arc::new(arrow_point_struct(vec![1.0, -111.7610], vec![2.0, 34.8697])); + let field = point_field("loc", false, Some("EPSG:4326")); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert!( + imported + .dtype() + .as_extension_opt() + .map(|ext| ext.is::()) + .unwrap_or(false) + ); + + assert_eq!( + coordinate_from_scalar(&imported.execute_scalar(0, &mut ctx)?)?, + Coordinate::xy(1.0, 2.0) + ); + assert_eq!( + coordinate_from_scalar(&imported.execute_scalar(1, &mut ctx)?)?, + Coordinate::xy(-111.7610, 34.8697) + ); + Ok(()) +} + +/// A point column exported to Arrow and imported back is unchanged, including the CRS. +#[test] +fn roundtrips_through_arrow() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let original = point_column(vec![0.0, 3.0], vec![4.0, 0.0])?; + + let target = point_field("loc", false, Some("EPSG:4326")); + let exported = SESSION + .arrow() + .execute_arrow(original, Some(&target), &mut ctx)?; + let reimported = SESSION.arrow().from_arrow_array(exported, &target)?; + + let ext = reimported + .dtype() + .as_extension_opt() + .ok_or_else(|| vortex_err!("expected Extension dtype"))?; + assert_eq!(ext.metadata::().crs.as_deref(), Some("EPSG:4326")); + + assert_eq!( + coordinate_from_scalar(&reimported.execute_scalar(0, &mut ctx)?)?, + Coordinate::xy(0.0, 4.0) + ); + assert_eq!( + coordinate_from_scalar(&reimported.execute_scalar(1, &mut ctx)?)?, + Coordinate::xy(3.0, 0.0) + ); + Ok(()) +} diff --git a/vortex-geo/src/tests/wkb.rs b/vortex-geo/src/tests/wkb.rs new file mode 100644 index 00000000000..d031f8a34f8 --- /dev/null +++ b/vortex-geo/src/tests/wkb.rs @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Arrow interop for the `vortex.geo.wkb` extension type (`geoarrow.wkb`). + +use std::sync::Arc; + +use arrow_array::Array as _; +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_array::BinaryArray; +use arrow_array::BinaryViewArray; +use arrow_array::LargeBinaryArray; +use arrow_array::cast::AsArray; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::extension::ExtensionType as _; +use geo_traits::to_geo::ToGeoGeometry; +use geo_types::Coord; +use geo_types::Geometry; +use geo_types::LineString; +use geo_types::Polygon; +use geoarrow::datatypes::Crs; +use geoarrow::datatypes::Metadata; +use geoarrow::datatypes::WkbType; +use vortex_array::IntoArray; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::varbin::builder::VarBinBuilder; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::extension::ExtDType; +use vortex_array::dtype::extension::ExtVTable; +use vortex_error::VortexResult; +use vortex_error::vortex_err; +use wkb::writer::WriteOptions; + +use super::SESSION; +use crate::extension::GeoMetadata; +use crate::extension::WellKnownBinary; + +/// The polygon geometry encoded by these tests. +fn test_polygon() -> Geometry { + Geometry::Polygon(Polygon::new( + LineString::new(vec![ + Coord::zero(), + Coord { x: 100.0, y: 0.0 }, + Coord { x: 100.0, y: 100.0 }, + Coord { x: 0.0, y: 100.0 }, + Coord::zero(), + ]), + vec![], + )) +} + +/// A WKB column (CRS `EPSG:4326`) holding [`test_polygon`] three times, along with the +/// polygon's WKB bytes. +fn wkb_extension_array() -> VortexResult<(Vec, vortex_array::ArrayRef)> { + let mut buf = Vec::new(); + // We should always prefer to write little-endian, which is the default option. + wkb::writer::write_geometry(&mut buf, &test_polygon(), &WriteOptions::default()) + .map_err(|e| vortex_err!("writing WKB failed: {e}"))?; + + let mut builder = VarBinBuilder::::with_capacity(3); + builder.append_value(&buf); + builder.append_value(&buf); + builder.append_value(&buf); + + let dtype = ExtDType::::try_new( + GeoMetadata { + crs: Some("EPSG:4326".to_string()), + }, + DType::Binary(Nullability::NonNullable), + )?; + let storage = builder.finish(DType::Binary(Nullability::NonNullable)); + let array = ExtensionArray::new(dtype.erased(), storage.into_array()).into_array(); + Ok((buf, array)) +} + +/// A `geoarrow.wkb` Arrow field over the given binary data type. +fn wkb_field(name: &str, data_type: DataType, nullable: bool, crs: Option<&str>) -> Field { + let crs = crs + .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) + .unwrap_or_default(); + let wkb_type = WkbType::new(Arc::new(Metadata::new(crs, None))); + Field::new(name, data_type, nullable).with_extension_type(wkb_type) +} + +fn assert_imported_wkb_dtype(dtype: &DType, expected_crs: Option<&str>, nullable: bool) { + let DType::Extension(ext) = dtype else { + panic!("expected Extension dtype, got {dtype}"); + }; + assert!(ext.is::()); + assert_eq!(ext.storage_dtype(), &DType::Binary(nullable.into())); + let geo = ext.metadata::(); + assert_eq!(geo.crs.as_deref(), expected_crs); +} + +/// WKB scalars unpack back to the geometry they encode. +#[test] +fn scalar_unpacks_to_geometry() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (_, array) = wkb_extension_array()?; + + let dtype = ExtDType::::try_new( + GeoMetadata { + crs: Some("EPSG:4326".to_string()), + }, + DType::Binary(Nullability::NonNullable), + )?; + + for idx in 0..3 { + let geom = array.execute_scalar(idx, &mut ctx)?; + let wkb = WellKnownBinary::unpack_native(&dtype, geom.value().unwrap())?; + assert_eq!(wkb.to_geometry(), test_polygon()); + } + Ok(()) +} + +/// The exported Arrow field carries the `geoarrow.wkb` extension over Vortex's canonical +/// binary mapping (`BinaryView`). +#[test] +fn export_field_carries_extension() -> VortexResult<()> { + let (_, array) = wkb_extension_array()?; + let field = SESSION.arrow().to_arrow_field("geom", array.dtype())?; + assert_eq!(field.extension_type_name(), Some(WkbType::NAME)); + assert_eq!(field.data_type(), &DataType::BinaryView); + Ok(()) +} + +/// Export materializes the WKB bytes into the requested binary-family Arrow array. +#[test] +fn exports_to_binary() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, array) = wkb_extension_array()?; + + let field = Field::new("geom", DataType::Binary, false) + .with_extension_type(WkbType::new(Default::default())); + let exported = SESSION + .arrow() + .execute_arrow(array, Some(&field), &mut ctx)?; + + assert_eq!(exported.data_type(), &DataType::Binary); + let binary = exported.as_binary::(); + assert_eq!(binary.len(), 3); + for idx in 0..3 { + assert_eq!(binary.value(idx), wkb_bytes.as_slice()); + } + Ok(()) +} + +/// Export materializes the WKB bytes into the requested binary-family Arrow array. +#[test] +fn exports_to_large_binary() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, array) = wkb_extension_array()?; + + let field = Field::new("geom", DataType::LargeBinary, false) + .with_extension_type(WkbType::new(Default::default())); + let exported = SESSION + .arrow() + .execute_arrow(array, Some(&field), &mut ctx)?; + + assert_eq!(exported.data_type(), &DataType::LargeBinary); + let binary = exported.as_binary::(); + assert_eq!(binary.len(), 3); + for idx in 0..3 { + assert_eq!(binary.value(idx), wkb_bytes.as_slice()); + } + Ok(()) +} + +/// Export materializes the WKB bytes into the requested binary-family Arrow array. +#[test] +fn exports_to_binary_view() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, array) = wkb_extension_array()?; + + let field = Field::new("geom", DataType::BinaryView, false) + .with_extension_type(WkbType::new(Default::default())); + let exported = SESSION + .arrow() + .execute_arrow(array, Some(&field), &mut ctx)?; + + assert_eq!(exported.data_type(), &DataType::BinaryView); + let binary = exported.as_binary_view(); + assert_eq!(binary.len(), 3); + for idx in 0..3 { + assert_eq!(binary.value(idx), wkb_bytes.as_slice()); + } + Ok(()) +} + +/// An imported `geoarrow.wkb` field maps to the WKB extension dtype, recovering the CRS. +#[test] +fn import_field_recovers_extension() -> VortexResult<()> { + let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); + let dtype = SESSION.arrow().from_arrow_field(&field)?; + assert_imported_wkb_dtype(&dtype, Some("EPSG:4326"), false); + Ok(()) +} + +/// A `geoarrow.wkb` field without a CRS imports as an unreferenced geometry. +#[test] +fn import_field_without_crs() -> VortexResult<()> { + let field = wkb_field("geom", DataType::BinaryView, true, None); + let dtype = SESSION.arrow().from_arrow_field(&field)?; + assert_imported_wkb_dtype(&dtype, None, true); + Ok(()) +} + +/// Import wraps the binary-family Arrow array's WKB values unchanged. +#[test] +fn imports_from_binary() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, _) = wkb_extension_array()?; + let arrow: ArrowArrayRef = Arc::new(BinaryArray::from_iter_values([ + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + ])); + let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); + + for idx in 0..3 { + let geom = imported.execute_scalar(idx, &mut ctx)?; + assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); + } + Ok(()) +} + +/// Import wraps the binary-family Arrow array's WKB values unchanged. +#[test] +fn imports_from_large_binary() -> VortexResult<()> { + let (wkb_bytes, _) = wkb_extension_array()?; + let arrow: ArrowArrayRef = Arc::new(LargeBinaryArray::from_iter_values([ + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + ])); + let field = wkb_field("geom", DataType::LargeBinary, false, Some("EPSG:4326")); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); + assert_eq!(imported.len(), 2); + Ok(()) +} + +/// Import wraps the binary-family Arrow array's WKB values unchanged. +#[test] +fn imports_from_binary_view() -> VortexResult<()> { + let (wkb_bytes, _) = wkb_extension_array()?; + let arrow: ArrowArrayRef = Arc::new(BinaryViewArray::from_iter_values([ + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + ])); + let field = wkb_field("geom", DataType::BinaryView, false, None); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert_imported_wkb_dtype(imported.dtype(), None, false); + assert_eq!(imported.len(), 4); + Ok(()) +} + +/// A WKB column exported to Arrow and imported back is unchanged, byte for byte. +#[test] +fn roundtrips_through_arrow() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, original) = wkb_extension_array()?; + + let field = Field::new("geom", DataType::BinaryView, false) + .with_extension_type(WkbType::new(Default::default())); + let exported = SESSION + .arrow() + .execute_arrow(original, Some(&field), &mut ctx)?; + let arrow_field = wkb_field("geom", DataType::BinaryView, false, Some("EPSG:4326")); + let reimported = SESSION.arrow().from_arrow_array(exported, &arrow_field)?; + + assert_imported_wkb_dtype(reimported.dtype(), Some("EPSG:4326"), false); + for idx in 0..3 { + let geom = reimported.execute_scalar(idx, &mut ctx)?; + assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); + } + Ok(()) +} From c34debeaff90a37e45ff3854869ba32f038d1f25 Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Fri, 12 Jun 2026 12:02:15 -0400 Subject: [PATCH 2/2] chore(vortex-geo): use rstest cases and vortex_ensure! Parameterize the table-driven tests with rstest cases, collapse the three binary-family WKB export tests into one, and replace the if-then-vortex_bail flows with vortex_ensure!. Signed-off-by: Nemo Yu --- Cargo.lock | 1 + vortex-geo/Cargo.toml | 1 + vortex-geo/src/extension/coordinate.rs | 44 ++++++++--------- vortex-geo/src/extension/point.rs | 33 ++++++------- vortex-geo/src/extension/wkb.rs | 9 ++-- vortex-geo/src/tests/wkb.rs | 67 +++++++------------------- 6 files changed, 60 insertions(+), 95 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fd66ecc5670..57be369d7c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9867,6 +9867,7 @@ dependencies = [ "geo-types", "geoarrow", "prost 0.14.4", + "rstest", "vortex-array", "vortex-error", "vortex-session", diff --git a/vortex-geo/Cargo.toml b/vortex-geo/Cargo.toml index 4e76e2eefcb..3b09cb77cf8 100644 --- a/vortex-geo/Cargo.toml +++ b/vortex-geo/Cargo.toml @@ -26,6 +26,7 @@ wkb = { workspace = true } [dev-dependencies] geo-traits = { workspace = true } geo-types = { workspace = true } +rstest = { workspace = true } [lints] workspace = true diff --git a/vortex-geo/src/extension/coordinate.rs b/vortex-geo/src/extension/coordinate.rs index 9d35a511c09..3e560bc4734 100644 --- a/vortex-geo/src/extension/coordinate.rs +++ b/vortex-geo/src/extension/coordinate.rs @@ -245,6 +245,7 @@ pub(crate) fn parse_storage( #[cfg(test)] mod tests { + use rstest::rstest; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ExtensionArray; @@ -267,38 +268,35 @@ mod tests { use crate::extension::Point; /// Each dimension round-trips through its field names and canonical storage dtype. - #[test] - fn storage_dtype_roundtrips_dimension() -> VortexResult<()> { - let cases = [ - (Dimension::Xy, ["x", "y"].as_slice()), - (Dimension::Xyz, ["x", "y", "z"].as_slice()), - (Dimension::Xym, ["x", "y", "m"].as_slice()), - (Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()), - ]; - for (dim, names) in cases { - assert_eq!(dim.field_names(), names); - let dtype = coordinate_storage_dtype(dim, Nullability::NonNullable); - assert_eq!(coordinate_dimension(&dtype)?, dim); - } + #[rstest] + #[case::xy(Dimension::Xy, &["x", "y"])] + #[case::xyz(Dimension::Xyz, &["x", "y", "z"])] + #[case::xym(Dimension::Xym, &["x", "y", "m"])] + #[case::xyzm(Dimension::Xyzm, &["x", "y", "z", "m"])] + fn storage_dtype_roundtrips_dimension( + #[case] dim: Dimension, + #[case] names: &[&str], + ) -> VortexResult<()> { + assert_eq!(dim.field_names(), names); + let dtype = coordinate_storage_dtype(dim, Nullability::NonNullable); + assert_eq!(coordinate_dimension(&dtype)?, dim); Ok(()) } /// Display emits WKT, including `z`/`m` when present. - #[test] - fn display_is_wkt() { - let coordinate = |z, m| Coordinate { + #[rstest] + #[case::xy(None, None, "POINT(1 2)")] + #[case::xyz(Some(3.0), None, "POINT Z (1 2 3)")] + #[case::xym(None, Some(4.0), "POINT M (1 2 4)")] + #[case::xyzm(Some(3.0), Some(4.0), "POINT ZM (1 2 3 4)")] + fn display_is_wkt(#[case] z: Option, #[case] m: Option, #[case] expected: &str) { + let coordinate = Coordinate { x: 1.0, y: 2.0, z, m, }; - assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)"); - assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)"); - assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)"); - assert_eq!( - coordinate(Some(3.0), Some(4.0)).to_string(), - "POINT ZM (1 2 3 4)" - ); + assert_eq!(coordinate.to_string(), expected); } /// [`parse_storage`] reads the coordinate fields unmasked, so a nullable point column must diff --git a/vortex-geo/src/extension/point.rs b/vortex-geo/src/extension/point.rs index 600082a1998..ecd544ca51e 100644 --- a/vortex-geo/src/extension/point.rs +++ b/vortex-geo/src/extension/point.rs @@ -34,7 +34,7 @@ use vortex_array::dtype::extension::ExtVTable; use vortex_array::scalar::Scalar; use vortex_array::scalar::ScalarValue; use vortex_error::VortexResult; -use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_session::registry::CachedId; use vortex_session::registry::Id; @@ -170,12 +170,11 @@ impl ArrowImportVTable for Point { let Ok(point_meta) = field.try_extension_type::() else { return Ok(None); }; - if point_meta.coord_type() != CoordType::Separated { - vortex_bail!( - "geoarrow.point with interleaved coordinates is not supported; \ - re-encode with separated (struct) coordinates" - ); - } + vortex_ensure!( + point_meta.coord_type() == CoordType::Separated, + "geoarrow.point with interleaved coordinates is not supported; \ + re-encode with separated (struct) coordinates" + ); let storage_dtype = coordinate_storage_dtype(point_meta.dimension().into(), field.is_nullable().into()); @@ -214,6 +213,7 @@ impl ArrowImportVTable for Point { #[cfg(test)] mod tests { + use rstest::rstest; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; @@ -241,17 +241,14 @@ mod tests { } /// `Point` accepts the canonical coordinate storage of every GeoArrow dimension. - #[test] - fn point_validates_every_dimension() -> VortexResult<()> { - for dim in [ - Dimension::Xy, - Dimension::Xyz, - Dimension::Xym, - Dimension::Xyzm, - ] { - let storage = coordinate_storage_dtype(dim, Nullability::NonNullable); - ExtDType::::try_new(geo_meta(), storage)?; - } + #[rstest] + #[case::xy(Dimension::Xy)] + #[case::xyz(Dimension::Xyz)] + #[case::xym(Dimension::Xym)] + #[case::xyzm(Dimension::Xyzm)] + fn point_validates_every_dimension(#[case] dim: Dimension) -> VortexResult<()> { + let storage = coordinate_storage_dtype(dim, Nullability::NonNullable); + ExtDType::::try_new(geo_meta(), storage)?; Ok(()) } diff --git a/vortex-geo/src/extension/wkb.rs b/vortex-geo/src/extension/wkb.rs index ddcfd9d53b6..5491d8673d7 100644 --- a/vortex-geo/src/extension/wkb.rs +++ b/vortex-geo/src/extension/wkb.rs @@ -33,7 +33,6 @@ use vortex_array::dtype::extension::ExtVTable; use vortex_array::scalar::ScalarValue; use vortex_error::VortexError; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_session::registry::CachedId; @@ -71,9 +70,11 @@ impl TryFrom for WellKnownBinaryData { type Error = VortexError; fn try_from(ext: ExtensionArray) -> Result { - if !ext.ext_dtype().is::() { - vortex_bail!("array extension dtype {} is not a WKB", ext.ext_dtype()); - } + vortex_ensure!( + ext.ext_dtype().is::(), + "array extension dtype {} is not a WKB", + ext.ext_dtype() + ); Ok(Self { ext }) } diff --git a/vortex-geo/src/tests/wkb.rs b/vortex-geo/src/tests/wkb.rs index d031f8a34f8..4ef3d6e2ba8 100644 --- a/vortex-geo/src/tests/wkb.rs +++ b/vortex-geo/src/tests/wkb.rs @@ -22,6 +22,7 @@ use geo_types::Polygon; use geoarrow::datatypes::Crs; use geoarrow::datatypes::Metadata; use geoarrow::datatypes::WkbType; +use rstest::rstest; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ExtensionArray; @@ -129,64 +130,30 @@ fn export_field_carries_extension() -> VortexResult<()> { } /// Export materializes the WKB bytes into the requested binary-family Arrow array. -#[test] -fn exports_to_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::Binary, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::Binary); - let binary = exported.as_binary::(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) -} - -/// Export materializes the WKB bytes into the requested binary-family Arrow array. -#[test] -fn exports_to_large_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::LargeBinary, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::LargeBinary); - let binary = exported.as_binary::(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) -} - -/// Export materializes the WKB bytes into the requested binary-family Arrow array. -#[test] -fn exports_to_binary_view() -> VortexResult<()> { +#[rstest] +#[case::binary(DataType::Binary)] +#[case::large_binary(DataType::LargeBinary)] +#[case::binary_view(DataType::BinaryView)] +fn exports_to_binary_family(#[case] data_type: DataType) -> VortexResult<()> { let mut ctx = SESSION.create_execution_ctx(); let (wkb_bytes, array) = wkb_extension_array()?; - let field = Field::new("geom", DataType::BinaryView, false) + let field = Field::new("geom", data_type.clone(), false) .with_extension_type(WkbType::new(Default::default())); let exported = SESSION .arrow() .execute_arrow(array, Some(&field), &mut ctx)?; - assert_eq!(exported.data_type(), &DataType::BinaryView); - let binary = exported.as_binary_view(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); + assert_eq!(exported.data_type(), &data_type); + let values: Vec<&[u8]> = match &data_type { + DataType::Binary => exported.as_binary::().iter().flatten().collect(), + DataType::LargeBinary => exported.as_binary::().iter().flatten().collect(), + DataType::BinaryView => exported.as_binary_view().iter().flatten().collect(), + _ => unreachable!("cases cover only the binary family"), + }; + assert_eq!(values.len(), 3); + for value in values { + assert_eq!(value, wkb_bytes.as_slice()); } Ok(()) }