Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions parquet/src/arrow/record_reader/definition_levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ impl PackedDecoder {
self.packed_offset = 0;
self.packed_count = match encoding {
Encoding::RLE => 0,
#[allow(deprecated)]
Encoding::BIT_PACKED => data.len() * 8,
_ => unreachable!("invalid level encoding: {}", encoding),
};
Expand Down
17 changes: 16 additions & 1 deletion parquet/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,21 @@ pub enum Encoding {
/// Usable for definition/repetition levels encoding and boolean values.
RLE,

/// Bit packed encoding.
/// **Deprecated** Bit-packed encoding.
///
/// This can only be used if the data has a known max width.
/// Usable for definition/repetition levels encoding.
///
/// There are compatibility issues with files using this encoding.
/// The parquet standard specifies the bits to be packed starting from the
/// most-significant bit, several implementations do not follow this bit order.
/// Several other implementations also have issues reading this encoding
/// because of incorrect assumptions about the length of the encoded data.
///
/// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
#[deprecated(
note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
)]
BIT_PACKED,

/// Delta encoding for integers, either INT32 or INT64.
Expand Down Expand Up @@ -301,6 +312,7 @@ impl FromStr for Encoding {
"PLAIN" | "plain" => Ok(Encoding::PLAIN),
"PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
"RLE" | "rle" => Ok(Encoding::RLE),
#[allow(deprecated)]
"BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
"DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
"DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
Expand Down Expand Up @@ -910,6 +922,7 @@ impl TryFrom<parquet::Encoding> for Encoding {
parquet::Encoding::PLAIN => Encoding::PLAIN,
parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
parquet::Encoding::RLE => Encoding::RLE,
#[allow(deprecated)]
parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
Expand All @@ -927,6 +940,7 @@ impl From<Encoding> for parquet::Encoding {
Encoding::PLAIN => parquet::Encoding::PLAIN,
Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
Encoding::RLE => parquet::Encoding::RLE,
#[allow(deprecated)]
Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
Expand Down Expand Up @@ -1114,6 +1128,7 @@ impl str::FromStr for LogicalType {
}

#[cfg(test)]
#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
mod tests {
use super::*;

Expand Down
1 change: 1 addition & 0 deletions parquet/src/bin/parquet-layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ fn encoding(encoding: parquet::format::Encoding) -> &'static str {
Ok(Encoding::PLAIN) => "plain",
Ok(Encoding::PLAIN_DICTIONARY) => "plain_dictionary",
Ok(Encoding::RLE) => "rle",
#[allow(deprecated)]
Ok(Encoding::BIT_PACKED) => "bit_packed",
Ok(Encoding::DELTA_BINARY_PACKED) => "delta_binary_packed",
Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY) => "delta_length_byte_array",
Expand Down
1 change: 1 addition & 0 deletions parquet/src/column/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,7 @@ fn parse_v1_level(
buf.slice(i32_size..i32_size + data_size),
))
}
#[allow(deprecated)]
Encoding::BIT_PACKED => {
let bit_width = num_required_bits(max_level as u64);
let num_bytes = ceil(num_buffered_values as usize * bit_width as usize, 8);
Expand Down
1 change: 1 addition & 0 deletions parquet/src/column/reader/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ impl LevelDecoder {
decoder.set_data(data);
Self::Rle(decoder)
}
#[allow(deprecated)]
Encoding::BIT_PACKED => Self::Packed(BitReader::new(data), bit_width),
_ => unreachable!("invalid level encoding: {}", encoding),
}
Expand Down
1 change: 1 addition & 0 deletions parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,7 @@ mod tests {
);

// unsupported
#[allow(deprecated)]
create_and_check_decoder::<Int32Type>(
Encoding::BIT_PACKED,
Some(nyi_err!("Encoding BIT_PACKED is not supported")),
Expand Down
1 change: 1 addition & 0 deletions parquet/src/encodings/encoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,7 @@ mod tests {
);

// unsupported
#[allow(deprecated)]
create_and_check_encoder::<Int32Type>(
Encoding::BIT_PACKED,
Some(nyi_err!("Encoding BIT_PACKED is not supported")),
Expand Down
2 changes: 2 additions & 0 deletions parquet/src/encodings/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub fn max_buffer_size(
let bit_width = num_required_bits(max_level as u64);
match encoding {
Encoding::RLE => RleEncoder::max_buffer_size(bit_width, num_buffered_values),
#[allow(deprecated)]
Encoding::BIT_PACKED => ceil(num_buffered_values * bit_width as usize, 8),
_ => panic!("Unsupported encoding type {encoding}"),
}
Expand Down Expand Up @@ -66,6 +67,7 @@ impl LevelEncoder {
buffer.extend_from_slice(&[0; 4]);
LevelEncoder::Rle(RleEncoder::new_from_buf(bit_width, buffer))
}
#[allow(deprecated)]
Encoding::BIT_PACKED => {
// Here we set full byte buffer without adjusting for num_buffered_values,
// because byte buffer will already be allocated with size from
Expand Down
4 changes: 3 additions & 1 deletion parquet/src/file/serialized_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,9 @@ mod tests {
assert_eq!(num_values, 8);
assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
assert_eq!(def_level_encoding, Encoding::RLE);
assert_eq!(rep_level_encoding, Encoding::BIT_PACKED);
#[allow(deprecated)]
let expected_rep_level_encoding = Encoding::BIT_PACKED;
assert_eq!(rep_level_encoding, expected_rep_level_encoding);
assert!(statistics.is_none());
true
}
Expand Down