From 46f0b551d5d45470afe2896f9ff426c08f4bfbec Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Thu, 4 Jun 2026 10:35:08 +0100 Subject: [PATCH] use scanbuilder::new Signed-off-by: Mikhail Kot --- vortex-array/src/expr/exprs.rs | 13 +++-- vortex-bench/src/random_access/take.rs | 6 +- vortex-file/src/file.rs | 79 +++++++++++++++++++------- vortex-file/src/open.rs | 29 +++++++--- 4 files changed, 94 insertions(+), 33 deletions(-) diff --git a/vortex-array/src/expr/exprs.rs b/vortex-array/src/expr/exprs.rs index dd7e2bf2530..73f4991d05c 100644 --- a/vortex-array/src/expr/exprs.rs +++ b/vortex-array/src/expr/exprs.rs @@ -4,6 +4,7 @@ //! Factory functions for creating [`Expression`]s from scalar function vtables. use std::sync::Arc; +use std::sync::LazyLock; use vortex_error::VortexExpect; use vortex_error::vortex_panic; @@ -52,20 +53,24 @@ use crate::scalar_fn::fns::variant_get::VariantGetOptions; use crate::scalar_fn::fns::variant_get::VariantPath; use crate::scalar_fn::fns::zip::Zip; -// ---- Root ---- +static ROOT: LazyLock = LazyLock::new(|| { + Root.try_new_expr(EmptyOptions, vec![]) + .vortex_expect("Creating root() shouldn't fail") +}); /// Creates an expression that references the root scope. /// /// Returns the entire input array as passed to the expression evaluator. /// This is commonly used as the starting point for field access and other operations. pub fn root() -> Expression { - Root.try_new_expr(EmptyOptions, vec![]) - .vortex_expect("Failed to create Root expression") + ROOT.clone() } /// Return whether the expression is a root expression. pub fn is_root(expr: &Expression) -> bool { - expr.is::() + // root doesn't have any children, and scalar_fns have distinct ids + // so we should almost always hit this eq check + (expr.scalar_fn().id() == ROOT.scalar_fn().id()) || expr.is::() } // ---- Literal ---- diff --git a/vortex-bench/src/random_access/take.rs b/vortex-bench/src/random_access/take.rs index 147a8cf347e..e86e5d576d6 100644 --- a/vortex-bench/src/random_access/take.rs +++ b/vortex-bench/src/random_access/take.rs @@ -48,7 +48,11 @@ impl VortexRandomAccessor { name: impl Into, format: Format, ) -> anyhow::Result { - let file = SESSION.open_options().open_path(path.as_ref()).await?; + let file = SESSION + .open_options() + .with_layout_reader_cache() + .open_path(path.as_ref()) + .await?; Ok(Self { name: name.into(), format, diff --git a/vortex-file/src/file.rs b/vortex-file/src/file.rs index 4e5a9e99049..ded986f6210 100644 --- a/vortex-file/src/file.rs +++ b/vortex-file/src/file.rs @@ -8,6 +8,7 @@ use std::ops::Range; use std::sync::Arc; +use std::sync::OnceLock; use itertools::Itertools; use vortex_array::ArrayRef; @@ -51,6 +52,29 @@ pub struct VortexFile { segment_source: Arc, /// The Vortex session used to open this file. session: VortexSession, + /// None id LayoutReader caching is turned off + layout_reader_cache: Option>>, +} + +fn layout_reader( + segment_source: Arc, + footer: &Footer, + session: &VortexSession, +) -> VortexResult> { + let root_reader = footer + .layout() + // TODO(ngates): we may want to allow the user pass in a name here? + .new_reader("".into(), segment_source, session, &Default::default())?; + + Ok(if let Some(stats) = footer.statistics().cloned() { + Arc::new(FileStatsLayoutReader::new( + root_reader, + stats, + session.clone(), + )) + } else { + root_reader + }) } impl VortexFile { @@ -64,6 +88,17 @@ impl VortexFile { footer, segment_source, session, + layout_reader_cache: None, + } + } + + /// Enable layout reader caching + pub fn with_caching(self) -> Self { + Self { + footer: self.footer, + segment_source: self.segment_source, + session: self.session, + layout_reader_cache: Some(OnceLock::new()), } } @@ -106,28 +141,30 @@ impl VortexFile { /// /// Wraps the root layout in a [`FileStatsLayoutReader`] if file stats are available. pub fn layout_reader(&self) -> VortexResult> { - let segment_source = self.segment_source(); - - let root_reader = self - .footer - .layout() - // TODO(ngates): we may want to allow the user pass in a name here? - .new_reader( - "".into(), - segment_source, + match &self.layout_reader_cache { + None => layout_reader( + Arc::clone(&self.segment_source), + &self.footer, &self.session, - &Default::default(), - )?; - - Ok(if let Some(stats) = self.file_stats().cloned() { - Arc::new(FileStatsLayoutReader::new( - root_reader, - stats, - self.session.clone(), - )) - } else { - root_reader - }) + ), + Some(reader) => { + // get_or_try_init is unstable + if let Some(val) = reader.get() { + Ok(Arc::clone(val)) + } else { + let inner = layout_reader( + Arc::clone(&self.segment_source), + &self.footer, + &self.session, + )?; + Ok(if let Err(val) = reader.set(Arc::clone(&inner)) { + val + } else { + inner + }) + } + } + } } /// Create a [`DataSource`](vortex_scan::DataSource) from this file for scanning. diff --git a/vortex-file/src/open.rs b/vortex-file/src/open.rs index 8fb0f22827e..c1793b7b517 100644 --- a/vortex-file/src/open.rs +++ b/vortex-file/src/open.rs @@ -60,6 +60,8 @@ pub struct VortexOpenOptions { metrics_registry: Option>, /// Default labels applied to all the file's metrics labels: Vec