Skip to content
Merged
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ members = [
"vortex-scalar",
"vortex-tui",
"vortex-utils",
"vortex-vector",
"xtask",
"vortex-gpu",
]
Expand Down
49 changes: 41 additions & 8 deletions vortex-buffer/src/bit/buf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ use crate::bit::{
use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer};

/// An immutable bitset stored as a packed byte buffer.
#[derive(Clone, Debug, Eq)]
#[derive(Debug, Clone, Eq)]
pub struct BitBuffer {
buffer: ByteBuffer,
len: usize,
offset: usize,
len: usize,
}

impl PartialEq for BitBuffer {
Expand All @@ -25,8 +25,8 @@ impl PartialEq for BitBuffer {
}

self.chunks()
.iter()
.zip(other.chunks())
.iter_padded()
.zip(other.chunks().iter_padded())
.all(|(a, b)| a == b)
}
}
Expand All @@ -48,10 +48,10 @@ impl BitBuffer {
}
}

/// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view, starting at the
/// given `offset` (in bits).
/// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view, starting at
/// the given `offset` (in bits).
///
/// Panics if the buffer is not large enough to hold `len` bits or if the offset is greater than
/// Panics if the buffer is not large enough to hold `len` bits after the offset.
pub fn new_with_offset(buffer: ByteBuffer, len: usize, offset: usize) -> Self {
assert!(
len.saturating_add(offset) <= buffer.len().saturating_mul(8),
Expand All @@ -61,8 +61,8 @@ impl BitBuffer {

Self {
buffer,
len,
offset,
len,
}
}

Expand Down Expand Up @@ -277,6 +277,14 @@ impl BitBuffer {
self.buffer.slice(word_start..word_end)
}

/// Attempt to convert this `BitBuffer` into a mutable version.
pub fn try_into_mut(self) -> Result<BitBufferMut, Self> {
match self.buffer.try_into_mut() {
Ok(buffer) => Ok(BitBufferMut::from_buffer(buffer, self.offset, self.len)),
Err(buffer) => Err(BitBuffer::new_with_offset(buffer, self.len, self.offset)),
}
}

/// Get a mutable version of this `BitBuffer` along with bit offset in the first byte.
///
/// If the caller doesn't hold only reference to the underlying buffer, a copy is created.
Expand Down Expand Up @@ -442,4 +450,29 @@ mod tests {
}
}
}

#[test]
fn test_padded_equaltiy() {
let buf1 = BitBuffer::new_set(64); // All bits set.
let buf2 = BitBuffer::collect_bool(64, |x| x < 32); // First half set, other half unset.

for i in 0..32 {
assert_eq!(buf1.value(i), buf2.value(i), "Bit {} should be the same", i);
}

for i in 32..64 {
assert_ne!(buf1.value(i), buf2.value(i), "Bit {} should differ", i);
}

assert_eq!(
buf1.slice(0..32),
buf2.slice(0..32),
"Buffer slices with same bits should be equal (`PartialEq` needs `iter_padded()`)"
);
assert_ne!(
buf1.slice(32..64),
buf2.slice(32..64),
"Buffer slices with different bits should not be equal (`PartialEq` needs `iter_padded()`)"
);
}
}
81 changes: 81 additions & 0 deletions vortex-buffer/src/bit/buf_mut.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

// TODO(connor): The API of `BitBufferMut` should probably share more methods with `BitBuffer`.

use arrow_buffer::bit_chunk_iterator::BitChunks;
use bitvec::view::BitView;

use crate::bit::{get_bit_unchecked, set_bit_unchecked, unset_bit_unchecked};
Expand All @@ -25,12 +28,26 @@ use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut};
/// ```
///
/// See also: [`BitBuffer`].
#[derive(Debug, Clone, Eq)]
pub struct BitBufferMut {
buffer: ByteBufferMut,
offset: usize,
len: usize,
}

impl PartialEq for BitBufferMut {
fn eq(&self, other: &Self) -> bool {
if self.len != other.len {
return false;
}

self.chunks()
Comment thread
connortsui20 marked this conversation as resolved.
.iter_padded()
.zip(other.chunks().iter_padded())
.all(|(a, b)| a == b)
}
}

impl BitBufferMut {
/// Create new bit buffer from given byte buffer and logical bit length
pub fn from_buffer(buffer: ByteBufferMut, offset: usize, len: usize) -> Self {
Expand Down Expand Up @@ -118,6 +135,13 @@ impl BitBufferMut {
unsafe { get_bit_unchecked(self.buffer.as_ptr(), self.offset + index) }
}

/// Access chunks of the underlying buffer as 8 byte chunks with a final trailer
///
/// If you're performing operations on a single buffer, prefer [BitBuffer::unaligned_chunks]
pub fn chunks(&self) -> BitChunks<'_> {
BitChunks::new(self.buffer.as_slice(), self.offset, self.len)
}

/// Get the bit capacity of the buffer.
#[inline(always)]
pub fn capacity(&self) -> usize {
Expand Down Expand Up @@ -362,6 +386,63 @@ impl BitBufferMut {
self.len += bit_len;
}

/// Splits the bit buffer into two at the given index.
///
/// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements
/// `[at, capacity)`.
///
/// Unlike bytes, if the split position is not on a byte-boundary this operation will copy
/// data into the result type, and mutate self.
pub fn split_off(&mut self, at: usize) -> Self {
assert!(at <= self.len, "index {at} exceeds len {}", self.len);

let new_offset = self.offset;
let new_len = self.len - at;

// If we are splitting on a byte boundary, we can just slice the buffer
if (self.offset + at) % 8 == 0 {
let byte_pos = (self.offset + at) / 8;
let new_buffer = self.buffer.split_off(byte_pos);
self.len = at;
return Self {
buffer: new_buffer,
offset: new_offset,
len: new_len,
};
}

// Otherwise, we need to copy bits into a new buffer
let mut new_buffer = BitBufferMut::with_capacity(new_len);
for i in 0..new_len {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we slice and build the other one from iter?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that would be cleaner? To be honest I didn't actually look through any of this code

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not about cleaner, the performance of this won't be great

let value = self.value(at + i);
new_buffer.append(value);
}

// Truncate self to the split position
self.truncate(at);

new_buffer
}

/// Absorbs a mutable buffer that was previously split off.
///
/// If the two buffers were previously contiguous and not mutated in a way that causes
/// re-allocation i.e., if other was created by calling split_off on this buffer, then this is
/// an O(1) operation that just decreases a reference count and sets a few indices.
///
/// Otherwise, this method degenerates to self.append_buffer(&other).
pub fn unsplit(&mut self, other: Self) {
if (self.offset + self.len) % 8 == 0 && other.offset == 0 {
// We are aligned and can just append the buffers
self.buffer.unsplit(other.buffer);
self.len += other.len;
return;
}

// Otherwise, we need to append the bits one by one
self.append_buffer(&other.freeze())
}

/// Freeze the buffer in its current state into an immutable `BoolBuffer`.
pub fn freeze(self) -> BitBuffer {
BitBuffer::new_with_offset(self.buffer.freeze(), self.len, self.offset)
Expand Down
60 changes: 0 additions & 60 deletions vortex-buffer/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,66 +447,6 @@ impl<T> Buffer<T> {
vortex_panic!("Buffer is not aligned to requested alignment {}", alignment)
}
}

/// Align the buffer to alignment of U
pub fn align_to<U>(mut self) -> (Buffer<T>, Buffer<U>, Buffer<T>) {
let offset = self.as_ptr().align_offset(align_of::<U>());
if offset > self.len() {
(
self,
Buffer::empty_aligned(Alignment::of::<U>()),
Buffer::empty_aligned(Alignment::of::<T>()),
)
} else {
let left = self.bytes.split_to(offset);
self.length -= offset;
let (us_len, _) = self.align_to_offsets::<U>();
let trailer = self.bytes.split_off(us_len * size_of::<U>());
(
Buffer::from_bytes_aligned(left, Alignment::of::<T>()),
Buffer::from_bytes_aligned(self.bytes, Alignment::of::<U>()),
Buffer::from_bytes_aligned(trailer, Alignment::of::<T>()),
)
}
}

/// Adapted from standard library slice::align_to_offsets
/// Function to calculate lengths of the middle and trailing slice for `align_to`.
fn align_to_offsets<U>(&self) -> (usize, usize) {
// What we're going to do about `rest` is figure out what multiple of `U`s we can put in the
// lowest number of `T`s. And how many `T`s we need for each such "multiple".
//
// Consider for example T=u8 U=u16. Then we can put 1 U in 2 Ts. Simple. Now, consider
// for example a case where size_of::<T> = 16, size_of::<U> = 24. We can put 2 Us in
// place of every 3 Ts in the `rest` slice. A bit more complicated.
//
// Formula to calculate this is:
//
// Us = lcm(size_of::<T>, size_of::<U>) / size_of::<U>
// Ts = lcm(size_of::<T>, size_of::<U>) / size_of::<T>
//
// Expanded and simplified:
//
// Us = size_of::<T> / gcd(size_of::<T>, size_of::<U>)
// Ts = size_of::<U> / gcd(size_of::<T>, size_of::<U>)
//
// Luckily since all this is constant-evaluated... performance here matters not!
const fn gcd(a: usize, b: usize) -> usize {
if b == 0 { a } else { gcd(b, a % b) }
}

// Explicitly wrap the function call in a const block so it gets
// constant-evaluated even in debug mode.
let gcd: usize = const { gcd(size_of::<T>(), size_of::<U>()) };
let ts: usize = size_of::<U>() / gcd;
let us: usize = size_of::<T>() / gcd;

// Armed with this knowledge, we can find how many `U`s we can fit!
let us_len = self.len() / ts * us;
// And how many `T`s will be in the trailing slice!
let ts_len = self.len() % ts;
(us_len, ts_len)
}
}

/// An iterator over Buffer elements.
Expand Down
54 changes: 54 additions & 0 deletions vortex-buffer/src/buffer_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,60 @@ impl<T> BufferMut<T> {
self.length += slice.len();
}

/// Splits the buffer into two at the given index.
///
/// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements
/// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of
/// self does not change, and the address of the returned slice is at bytes after that.
///
/// This is an O(1) operation that just increases the reference count and sets a few indices.
///
/// Panics if either half would have a length that is not a multiple of the alignment.
pub fn split_off(&mut self, at: usize) -> Self {
if at > self.len() {
vortex_panic!("Cannot split buffer of length {} at {}", self.len(), at);
}

let bytes_at = at * size_of::<T>();
if !bytes_at.is_multiple_of(*self.alignment) {
vortex_panic!(
"Cannot split buffer at {}, resulting alignment is not {}",
at,
self.alignment
);
}

let new_bytes = self.bytes.split_off(bytes_at);
let new_length = self.length - at;
self.length = at;

BufferMut {
bytes: new_bytes,
length: new_length,
alignment: self.alignment,
_marker: Default::default(),
}
}

/// Absorbs a mutable buffer that was previously split off.
///
/// If the two buffers were previously contiguous and not mutated in a way that causes
/// re-allocation i.e., if other was created by calling split_off on this buffer, then this is
/// an O(1) operation that just decreases a reference count and sets a few indices.
///
/// Otherwise, this method degenerates to self.extend_from_slice(other.as_ref()).
pub fn unsplit(&mut self, other: Self) {
if self.alignment != other.alignment {
vortex_panic!(
"Cannot unsplit buffers with different alignments: {} and {}",
self.alignment,
other.alignment
);
}
self.bytes.unsplit(other.bytes);
self.length += other.length;
}

/// Freeze the `BufferMut` into a `Buffer`.
pub fn freeze(self) -> Buffer<T> {
Buffer {
Expand Down
18 changes: 13 additions & 5 deletions vortex-dtype/src/nullability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,21 @@ pub enum Nullability {
}

impl Nullability {
/// A self-describing displayed form.
/// Returns `true` if the nullability is [`Nullable`](Self::Nullable), otherwise returns
/// `false`.
///
/// The usual Display renders [Nullability::NonNullable] as the empty string.
pub fn verbose_display(&self) -> impl Display {
/// # Examples
///
/// ```
/// use vortex_dtype::Nullability::*;
///
/// assert!(!NonNullable.is_nullable());
/// assert!(Nullable.is_nullable());
/// ```
pub fn is_nullable(&self) -> bool {
match self {
Nullability::NonNullable => "NonNullable",
Nullability::Nullable => "Nullable",
Nullability::NonNullable => false,
Nullability::Nullable => true,
}
}
}
Expand Down
Loading
Loading