Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ members = [
"vortex-scalar",
"vortex-tui",
"vortex-utils",
"vortex-vector",
"xtask",
"vortex-gpu",
]
Expand Down
10 changes: 9 additions & 1 deletion vortex-buffer/src/bit/buf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::bit::{
use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer};

/// An immutable bitset stored as a packed byte buffer.
#[derive(Clone, Debug, Eq)]
#[derive(Debug, Clone, Eq)]
pub struct BitBuffer {
buffer: ByteBuffer,
len: usize,
Expand Down Expand Up @@ -277,6 +277,14 @@ impl BitBuffer {
self.buffer.slice(word_start..word_end)
}

/// Attempt to convert this `BitBuffer` into a mutable version.
pub fn try_into_mut(self) -> Result<BitBufferMut, Self> {
match self.buffer.try_into_mut() {
Ok(buffer) => Ok(BitBufferMut::from_buffer(buffer, self.offset, self.len)),
Err(buffer) => Err(BitBuffer::new_with_offset(buffer, self.len, self.offset)),
}
}

/// Get a mutable version of this `BitBuffer` along with bit offset in the first byte.
///
/// If the caller doesn't hold only reference to the underlying buffer, a copy is created.
Expand Down
79 changes: 79 additions & 0 deletions vortex-buffer/src/bit/buf_mut.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use arrow_buffer::bit_chunk_iterator::BitChunks;
use bitvec::view::BitView;

use crate::bit::{get_bit_unchecked, set_bit_unchecked, unset_bit_unchecked};
Expand All @@ -25,12 +26,26 @@ use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut};
/// ```
///
/// See also: [`BitBuffer`].
#[derive(Debug, Clone, Eq)]
pub struct BitBufferMut {
buffer: ByteBufferMut,
offset: usize,
len: usize,
}

impl PartialEq for BitBufferMut {
fn eq(&self, other: &Self) -> bool {
if self.len != other.len {
return false;
}

self.chunks()
.iter()
.zip(other.chunks())
.all(|(a, b)| a == b)
}
}

impl BitBufferMut {
/// Create new bit buffer from given byte buffer and logical bit length
pub fn from_buffer(buffer: ByteBufferMut, offset: usize, len: usize) -> Self {
Expand Down Expand Up @@ -118,6 +133,13 @@ impl BitBufferMut {
unsafe { get_bit_unchecked(self.buffer.as_ptr(), self.offset + index) }
}

/// Access chunks of the underlying buffer as 8 byte chunks with a final trailer
///
/// If you're performing operations on a single buffer, prefer [BitBuffer::unaligned_chunks]
pub fn chunks(&self) -> BitChunks<'_> {
BitChunks::new(self.buffer.as_slice(), self.offset, self.len)
}

/// Get the bit capacity of the buffer.
#[inline(always)]
pub fn capacity(&self) -> usize {
Expand Down Expand Up @@ -362,6 +384,63 @@ impl BitBufferMut {
self.len += bit_len;
}

/// Splits the bit buffer into two at the given index.
///
/// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements
/// `[at, capacity)`.
///
/// Unlike bytes, if the split position is not on a byte-boundary this operation will copy
/// data into the result type, and mutate self.
pub fn split_off(&mut self, at: usize) -> Self {
assert!(at <= self.len, "index {at} exceeds len {}", self.len);

let new_offset = self.offset;
let new_len = self.len - at;

// If we are splitting on a byte boundary, we can just slice the buffer
if (self.offset + at) % 8 == 0 {
let byte_pos = (self.offset + at) / 8;
let new_buffer = self.buffer.split_off(byte_pos);
self.len = at;
return Self {
buffer: new_buffer,
offset: new_offset,
len: new_len,
};
}

// Otherwise, we need to copy bits into a new buffer
let mut new_buffer = BitBufferMut::with_capacity(new_len);
for i in 0..new_len {
let value = self.value(at + i);
new_buffer.append(value);
}

// Truncate self to the split position
self.truncate(at);

new_buffer
}

/// Absorbs a mutable buffer that was previously split off.
///
/// If the two buffers were previously contiguous and not mutated in a way that causes
/// re-allocation i.e., if other was created by calling split_off on this buffer, then this is
/// an O(1) operation that just decreases a reference count and sets a few indices.
///
/// Otherwise, this method degenerates to self.append_buffer(&other).
pub fn unsplit(&mut self, other: Self) {
if (self.offset + self.len) % 8 == 0 && other.offset == 0 {
// We are aligned and can just append the buffers
self.buffer.unsplit(other.buffer);
self.len += other.len;
return;
}

// Otherwise, we need to append the bits one by one
self.append_buffer(&other.freeze())
}

/// Freeze the buffer in its current state into an immutable `BoolBuffer`.
pub fn freeze(self) -> BitBuffer {
BitBuffer::new_with_offset(self.buffer.freeze(), self.len, self.offset)
Expand Down
60 changes: 0 additions & 60 deletions vortex-buffer/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,66 +447,6 @@ impl<T> Buffer<T> {
vortex_panic!("Buffer is not aligned to requested alignment {}", alignment)
}
}

/// Align the buffer to alignment of U
pub fn align_to<U>(mut self) -> (Buffer<T>, Buffer<U>, Buffer<T>) {
let offset = self.as_ptr().align_offset(align_of::<U>());
if offset > self.len() {
(
self,
Buffer::empty_aligned(Alignment::of::<U>()),
Buffer::empty_aligned(Alignment::of::<T>()),
)
} else {
let left = self.bytes.split_to(offset);
self.length -= offset;
let (us_len, _) = self.align_to_offsets::<U>();
let trailer = self.bytes.split_off(us_len * size_of::<U>());
(
Buffer::from_bytes_aligned(left, Alignment::of::<T>()),
Buffer::from_bytes_aligned(self.bytes, Alignment::of::<U>()),
Buffer::from_bytes_aligned(trailer, Alignment::of::<T>()),
)
}
}

/// Adapted from standard library slice::align_to_offsets
/// Function to calculate lengths of the middle and trailing slice for `align_to`.
fn align_to_offsets<U>(&self) -> (usize, usize) {
// What we're going to do about `rest` is figure out what multiple of `U`s we can put in the
// lowest number of `T`s. And how many `T`s we need for each such "multiple".
//
// Consider for example T=u8 U=u16. Then we can put 1 U in 2 Ts. Simple. Now, consider
// for example a case where size_of::<T> = 16, size_of::<U> = 24. We can put 2 Us in
// place of every 3 Ts in the `rest` slice. A bit more complicated.
//
// Formula to calculate this is:
//
// Us = lcm(size_of::<T>, size_of::<U>) / size_of::<U>
// Ts = lcm(size_of::<T>, size_of::<U>) / size_of::<T>
//
// Expanded and simplified:
//
// Us = size_of::<T> / gcd(size_of::<T>, size_of::<U>)
// Ts = size_of::<U> / gcd(size_of::<T>, size_of::<U>)
//
// Luckily since all this is constant-evaluated... performance here matters not!
const fn gcd(a: usize, b: usize) -> usize {
if b == 0 { a } else { gcd(b, a % b) }
}

// Explicitly wrap the function call in a const block so it gets
// constant-evaluated even in debug mode.
let gcd: usize = const { gcd(size_of::<T>(), size_of::<U>()) };
let ts: usize = size_of::<U>() / gcd;
let us: usize = size_of::<T>() / gcd;

// Armed with this knowledge, we can find how many `U`s we can fit!
let us_len = self.len() / ts * us;
// And how many `T`s will be in the trailing slice!
let ts_len = self.len() % ts;
(us_len, ts_len)
}
}

/// An iterator over Buffer elements.
Expand Down
54 changes: 54 additions & 0 deletions vortex-buffer/src/buffer_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,60 @@ impl<T> BufferMut<T> {
self.length += slice.len();
}

/// Splits the buffer into two at the given index.
///
/// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements
/// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of
/// self does not change, and the address of the returned slice is at bytes after that.
///
/// This is an O(1) operation that just increases the reference count and sets a few indices.
///
/// Panics if either half would have a length that is not a multiple of the alignment.
pub fn split_off(&mut self, at: usize) -> Self {
if at > self.len() {
vortex_panic!("Cannot split buffer of length {} at {}", self.len(), at);
}

let bytes_at = at * size_of::<T>();
if !bytes_at.is_multiple_of(*self.alignment) {
vortex_panic!(
"Cannot split buffer at {}, resulting alignment is not {}",
at,
self.alignment
);
}

let new_bytes = self.bytes.split_off(bytes_at);
let new_length = self.length - at;
self.length = at;

BufferMut {
bytes: new_bytes,
length: new_length,
alignment: self.alignment,
_marker: Default::default(),
}
}

/// Absorbs a mutable buffer that was previously split off.
///
/// If the two buffers were previously contiguous and not mutated in a way that causes
/// re-allocation i.e., if other was created by calling split_off on this buffer, then this is
/// an O(1) operation that just decreases a reference count and sets a few indices.
///
/// Otherwise, this method degenerates to self.extend_from_slice(other.as_ref()).
pub fn unsplit(&mut self, other: Self) {
if self.alignment != other.alignment {
vortex_panic!(
"Cannot unsplit buffers with different alignments: {} and {}",
self.alignment,
other.alignment
);
}
self.bytes.unsplit(other.bytes);
self.length += other.length;
}

/// Freeze the `BufferMut` into a `Buffer`.
pub fn freeze(self) -> Buffer<T> {
Buffer {
Expand Down
Loading
Loading