Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions vortex-buffer/src/bit/buf_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ impl BitBufferMut {
}
}

/// Consumes the buffer and return the underlying byte buffer.
pub fn into_inner(self) -> ByteBufferMut {
self.buffer
}

/// Create a new mutable buffer with requested `len` and all bits set to `true`.
pub fn new_set(len: usize) -> Self {
Self {
Expand Down Expand Up @@ -205,20 +210,24 @@ impl BitBufferMut {

/// Set the bit at `index` to `true` without checking bounds.
///
/// Note: Do not call this in a tight loop. Prefer to use [`set_bit_unchecked`].
///
/// # Safety
///
/// The caller must ensure that `index` does not exceed the largest bit index in the backing buffer.
pub unsafe fn set_unchecked(&mut self, index: usize) {
unsafe fn set_unchecked(&mut self, index: usize) {
// SAFETY: checked by caller
unsafe { set_bit_unchecked(self.buffer.as_mut_ptr(), self.offset + index) }
}

/// Unset the bit at `index` without checking bounds.
///
/// Note: Do not call this in a tight loop. Prefer to use [`unset_bit_unchecked`].
///
/// # Safety
///
/// The caller must ensure that `index` does not exceed the largest bit index in the backing buffer.
pub unsafe fn unset_unchecked(&mut self, index: usize) {
unsafe fn unset_unchecked(&mut self, index: usize) {
// SAFETY: checked by caller
unsafe { unset_bit_unchecked(self.buffer.as_mut_ptr(), self.offset + index) }
}
Expand All @@ -229,6 +238,7 @@ impl BitBufferMut {
///
/// - `new_len` must be less than or equal to [`capacity()`](Self::capacity)
/// - The elements at `old_len..new_len` must be initialized
#[inline(always)]
pub unsafe fn set_len(&mut self, new_len: usize) {
debug_assert!(
new_len <= self.capacity(),
Expand Down Expand Up @@ -470,6 +480,11 @@ impl BitBufferMut {
pub fn as_mut_slice(&mut self) -> &mut [u8] {
self.buffer.as_mut_slice()
}

/// Returns a raw mutable pointer to the internal buffer.
pub fn as_mut_ptr(&mut self) -> *mut u8 {
self.buffer.as_mut_ptr()
}
}

impl Default for BitBufferMut {
Expand Down Expand Up @@ -511,14 +526,20 @@ impl FromIterator<bool> for BitBufferMut {
fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
let mut iter = iter.into_iter();

// Note that these hints might be incorrect.
let (lower_bound, upper_bound_opt) = iter.size_hint();
let capacity = upper_bound_opt.unwrap_or(lower_bound);
// Since we do not know the length of the iterator, we can only guess how much memory we
// need to reserve. Note that these hints may be inaccurate.
let (lower_bound, _) = iter.size_hint();

let mut buf = BitBufferMut::new_unset(capacity);
// We choose not to use the optional upper bound size hint to match the standard library.

// Initialize all bits to 0 with the given length. By doing this, we only need to set bits
// that are true (and this is faster from benchmarks).
let mut buf = BitBufferMut::new_unset(lower_bound);
assert_eq!(buf.offset, 0);

// Directly write within our known capacity.
for i in 0..capacity {
let ptr = buf.buffer.as_mut_ptr();
for i in 0..lower_bound {
let Some(v) = iter.next() else {
// SAFETY: We are definitely under the capacity and all values are already
// initialized from `new_unset`.
Expand All @@ -528,7 +549,7 @@ impl FromIterator<bool> for BitBufferMut {

if v {
// SAFETY: We have ensured that we are within the capacity.
unsafe { buf.set_unchecked(i) }
unsafe { set_bit_unchecked(ptr, i) }
}
}

Expand Down
7 changes: 4 additions & 3 deletions vortex-buffer/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,10 @@ impl<T> Buffer<T> {
/// Create a buffer with values from the TrustedLen iterator.
/// Should be preferred over `from_iter` when the iterator is known to be `TrustedLen`.
pub fn from_trusted_len_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
let (_, high) = iter.size_hint();
let mut buffer =
BufferMut::with_capacity(high.vortex_expect("TrustedLen iterator has no upper bound"));
let (_, upper_bound) = iter.size_hint();
let mut buffer = BufferMut::with_capacity(
upper_bound.vortex_expect("TrustedLen iterator has no upper bound"),
);
buffer.extend_trusted(iter);
buffer.freeze()
}
Expand Down
13 changes: 5 additions & 8 deletions vortex-buffer/src/buffer_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,14 +484,11 @@ impl<T> BufferMut<T> {
fn extend_iter(&mut self, mut iter: impl Iterator<Item = T>) {
// Since we do not know the length of the iterator, we can only guess how much memory we
// need to reserve. Note that these hints may be inaccurate.
let (lower_bound, upper_bound_opt) = iter.size_hint();

// In the case that the upper bound is adversarial, we put a hard limit on the amount of
// memory we reserve (and the OS should handle the rest with zero pages).
let reserve_amount = upper_bound_opt
.unwrap_or(lower_bound)
.min(i32::MAX as usize);
self.reserve(reserve_amount);
let (lower_bound, _) = iter.size_hint();

// We choose not to use the optional upper bound size hint to match the standard library.

self.reserve(lower_bound);

let unwritten = self.capacity() - self.len();

Expand Down
8 changes: 4 additions & 4 deletions vortex-buffer/src/trusted_len.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ pub trait TrustedLenExt: Iterator + Sized {
///
/// The caller must guarantee that the iterator does indeed have an exact length.
unsafe fn trusted_len(self) -> TrustedLenAdapter<Self> {
let (lower, maybe_upper) = self.size_hint();
if let Some(upper) = maybe_upper {
let (lower_bound, upper_bound_opt) = self.size_hint();
if let Some(upper_bound) = upper_bound_opt {
assert_eq!(
lower, upper,
lower_bound, upper_bound,
"TrustedLenExt: iterator size hints must match if upper bound is given"
);
}

TrustedLenAdapter {
inner: self,
len: lower,
len: lower_bound,
#[cfg(debug_assertions)]
count: 0,
}
Expand Down
10 changes: 6 additions & 4 deletions vortex-mask/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::sync::{Arc, OnceLock};

use itertools::Itertools;
pub use mask_mut::*;
use vortex_buffer::{BitBuffer, BitBufferMut};
use vortex_buffer::{BitBuffer, BitBufferMut, set_bit_unchecked};
use vortex_error::{VortexResult, vortex_panic};

/// Represents a set of values that are all included, all excluded, or some mixture of both.
Expand Down Expand Up @@ -601,11 +601,13 @@ impl Mask {
let existing_buffer = mask_values.bit_buffer();

let mut new_buffer_builder = BitBufferMut::new_unset(mask_values.len());
debug_assert!(limit < mask_values.len());

let ptr = new_buffer_builder.as_mut_ptr();
for index in existing_buffer.set_indices().take(limit) {
unsafe {
new_buffer_builder.set_unchecked(index);
}
// SAFETY: We checked that `limit` was less than the mask values length,
// therefore `index` must be within the bounds of the bit buffer.
unsafe { set_bit_unchecked(ptr, index) }
}

Self::from(new_buffer_builder.freeze())
Expand Down
15 changes: 8 additions & 7 deletions vortex-vector/src/bool/from_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,29 @@ impl FromIterator<Option<bool>> for BoolVectorMut {
I: IntoIterator<Item = Option<bool>>,
{
let iter = iter.into_iter();
// Since we do not know the length of the iterator, we can only guess how much memory we
// need to reserve. Note that these hints may be inaccurate.
let (lower_bound, _) = iter.size_hint();

let mut bits = Vec::with_capacity(lower_bound);
// We choose not to use the optional upper bound size hint to match the standard library.

let mut bits = BitBufferMut::with_capacity(lower_bound);
let mut validity = MaskMut::with_capacity(lower_bound);

for opt_val in iter {
match opt_val {
Some(val) => {
bits.push(val);
bits.append(val);
validity.append_n(true, 1);
}
None => {
bits.push(false); // Value doesn't matter for invalid entries.
bits.append(false); // Value doesn't matter for invalid entries.
validity.append_n(false, 1);
}
}
}

BoolVectorMut {
bits: BitBufferMut::from_iter(bits),
validity,
}
BoolVectorMut { bits, validity }
}
}

Expand Down
11 changes: 6 additions & 5 deletions vortex-vector/src/primitive/from_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@ impl<T: NativePType> FromIterator<Option<T>> for PVectorMut<T> {
I: IntoIterator<Item = Option<T>>,
{
let iter = iter.into_iter();
// Since we do not know the length of the iterator, we can only guess how much memory we
// need to reserve. Note that these hints may be inaccurate.
let (lower_bound, _) = iter.size_hint();

let mut elements = Vec::with_capacity(lower_bound);
// We choose not to use the optional upper bound size hint to match the standard library.

let mut elements = BufferMut::with_capacity(lower_bound);
let mut validity = MaskMut::with_capacity(lower_bound);

for opt_val in iter {
Expand All @@ -45,10 +49,7 @@ impl<T: NativePType> FromIterator<Option<T>> for PVectorMut<T> {
}
}

PVectorMut {
elements: BufferMut::from_iter(elements),
validity,
}
PVectorMut { elements, validity }
}
}

Expand Down
Loading