Skip to content

Commit 56af7bf

Browse files
authored
Merge branch 'master' into dicts
2 parents a710b22 + fb92a10 commit 56af7bf

File tree

12 files changed

+375
-249
lines changed

12 files changed

+375
-249
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ruzstd"
3-
version = "0.8.0"
3+
version = "0.8.1"
44
authors = ["Moritz Borcherding <moritz.borcherding@web.de>"]
55
edition = "2018"
66
license = "MIT"

src/bin/zstd.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,20 @@ fn main() {
183183
},
184184
start_instant.elapsed().as_millis()
185185
);
186+
187+
println!("Check against source file. Decoding...");
188+
let mut decoded = Vec::with_capacity(input_len);
189+
ruzstd::decoding::FrameDecoder::new()
190+
.decode_all_to_vec(&output, &mut decoded)
191+
.unwrap();
192+
println!("Decoded without error");
193+
assert_eq!(decoded.len(), input_len);
194+
println!("Decoded length is correct, now check against file contents file");
195+
let input = std::fs::read(&path).unwrap();
196+
assert_eq!(decoded.len(), input.len());
197+
assert!(decoded == input);
198+
println!("Checks completed");
199+
186200
output.clear();
187201
encoder.set_drain(output);
188202
}

src/decoding/errors.rs

Lines changed: 74 additions & 110 deletions
Large diffs are not rendered by default.

src/encoding/blocks/compressed.rs

Lines changed: 164 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::{
44
bit_io::BitWriter,
55
encoding::frame_compressor::CompressState,
66
encoding::{Matcher, Sequence},
7-
fse::fse_encoder::{default_ll_table, default_ml_table, default_of_table, FSETable, State},
7+
fse::fse_encoder::{build_table_from_data, FSETable, State},
88
huff0::huff0_encoder,
99
};
1010

@@ -50,71 +50,176 @@ pub fn compress_block<M: Matcher>(state: &mut CompressState<M>, output: &mut Vec
5050
} else {
5151
encode_seqnum(sequences.len(), &mut writer);
5252

53-
// use standard FSE tables
54-
writer.write_bits(0u8, 8);
53+
// Choose the tables
54+
// TODO store previously used tables
55+
let ll_mode = choose_table(
56+
state.fse_tables.ll_previous.as_ref(),
57+
&state.fse_tables.ll_default,
58+
sequences.iter().map(|seq| encode_literal_length(seq.ll).0),
59+
9,
60+
);
61+
let ml_mode = choose_table(
62+
state.fse_tables.ml_previous.as_ref(),
63+
&state.fse_tables.ml_default,
64+
sequences.iter().map(|seq| encode_match_len(seq.ml).0),
65+
9,
66+
);
67+
let of_mode = choose_table(
68+
state.fse_tables.of_previous.as_ref(),
69+
&state.fse_tables.of_default,
70+
sequences.iter().map(|seq| encode_offset(seq.of).0),
71+
8,
72+
);
5573

56-
let ll_table: FSETable = default_ll_table();
57-
let ml_table: FSETable = default_ml_table();
58-
let of_table: FSETable = default_of_table();
59-
60-
let sequence = sequences[sequences.len() - 1];
61-
let (ll_code, ll_add_bits, ll_num_bits) = encode_literal_length(sequence.ll);
62-
let (of_code, of_add_bits, of_num_bits) = encode_offset(sequence.of);
63-
let (ml_code, ml_add_bits, ml_num_bits) = encode_match_len(sequence.ml);
64-
let mut ll_state: &State = ll_table.start_state(ll_code);
65-
let mut ml_state: &State = ml_table.start_state(ml_code);
66-
let mut of_state: &State = of_table.start_state(of_code);
67-
68-
writer.write_bits(ll_add_bits, ll_num_bits);
69-
writer.write_bits(ml_add_bits, ml_num_bits);
70-
writer.write_bits(of_add_bits, of_num_bits);
71-
72-
// encode backwards so the decoder reads the first sequence first
73-
if sequences.len() > 1 {
74-
for sequence in (0..=sequences.len() - 2).rev() {
75-
let sequence = sequences[sequence];
76-
let (ll_code, ll_add_bits, ll_num_bits) = encode_literal_length(sequence.ll);
77-
let (of_code, of_add_bits, of_num_bits) = encode_offset(sequence.of);
78-
let (ml_code, ml_add_bits, ml_num_bits) = encode_match_len(sequence.ml);
79-
80-
{
81-
let next = of_table.next_state(of_code, of_state.index);
82-
let diff = of_state.index - next.baseline;
83-
writer.write_bits(diff as u64, next.num_bits as usize);
84-
of_state = next;
85-
}
86-
{
87-
let next = ml_table.next_state(ml_code, ml_state.index);
88-
let diff = ml_state.index - next.baseline;
89-
writer.write_bits(diff as u64, next.num_bits as usize);
90-
ml_state = next;
91-
}
92-
{
93-
let next = ll_table.next_state(ll_code, ll_state.index);
94-
let diff = ll_state.index - next.baseline;
95-
writer.write_bits(diff as u64, next.num_bits as usize);
96-
ll_state = next;
97-
}
98-
99-
writer.write_bits(ll_add_bits, ll_num_bits);
100-
writer.write_bits(ml_add_bits, ml_num_bits);
101-
writer.write_bits(of_add_bits, of_num_bits);
102-
}
103-
}
104-
writer.write_bits(ml_state.index as u64, ml_table.table_size.ilog2() as usize);
105-
writer.write_bits(of_state.index as u64, of_table.table_size.ilog2() as usize);
106-
writer.write_bits(ll_state.index as u64, ll_table.table_size.ilog2() as usize);
74+
writer.write_bits(encode_fse_table_modes(&ll_mode, &ml_mode, &of_mode), 8);
10775

108-
let bits_to_fill = writer.misaligned();
109-
if bits_to_fill == 0 {
110-
writer.write_bits(1u32, 8);
111-
} else {
112-
writer.write_bits(1u32, bits_to_fill);
76+
encode_table(&ll_mode, &mut writer);
77+
encode_table(&of_mode, &mut writer);
78+
encode_table(&ml_mode, &mut writer);
79+
80+
encode_sequences(
81+
&sequences,
82+
&mut writer,
83+
ll_mode.as_ref(),
84+
ml_mode.as_ref(),
85+
of_mode.as_ref(),
86+
);
87+
88+
if let FseTableMode::Encoded(table) = ll_mode {
89+
state.fse_tables.ll_previous = Some(table)
90+
}
91+
if let FseTableMode::Encoded(table) = ml_mode {
92+
state.fse_tables.ml_previous = Some(table)
93+
}
94+
if let FseTableMode::Encoded(table) = of_mode {
95+
state.fse_tables.of_previous = Some(table)
11396
}
11497
}
11598
writer.flush();
11699
}
117100

101+
#[derive(Clone)]
102+
#[allow(clippy::large_enum_variant)]
103+
enum FseTableMode<'a> {
104+
Predefined(&'a FSETable),
105+
Encoded(FSETable),
106+
RepeateLast(&'a FSETable),
107+
}
108+
109+
impl FseTableMode<'_> {
110+
pub fn as_ref(&self) -> &FSETable {
111+
match self {
112+
Self::Predefined(t) => t,
113+
Self::RepeateLast(t) => t,
114+
Self::Encoded(t) => t,
115+
}
116+
}
117+
}
118+
119+
fn choose_table<'a>(
120+
previous: Option<&'a FSETable>,
121+
default_table: &'a FSETable,
122+
data: impl Iterator<Item = u8>,
123+
max_log: u8,
124+
) -> FseTableMode<'a> {
125+
// TODO check if the new table is better than the predefined and previous table
126+
let use_new_table = true;
127+
let use_previous_table = false;
128+
if use_previous_table {
129+
FseTableMode::RepeateLast(previous.unwrap())
130+
} else if use_new_table {
131+
FseTableMode::Encoded(build_table_from_data(data, max_log, true))
132+
} else {
133+
FseTableMode::Predefined(default_table)
134+
}
135+
}
136+
137+
fn encode_table(mode: &FseTableMode<'_>, writer: &mut BitWriter<&mut Vec<u8>>) {
138+
match mode {
139+
FseTableMode::Predefined(_) => {}
140+
FseTableMode::RepeateLast(_) => {}
141+
FseTableMode::Encoded(table) => table.write_table(writer),
142+
}
143+
}
144+
145+
fn encode_fse_table_modes(
146+
ll_mode: &FseTableMode<'_>,
147+
ml_mode: &FseTableMode<'_>,
148+
of_mode: &FseTableMode<'_>,
149+
) -> u8 {
150+
fn mode_to_bits(mode: &FseTableMode<'_>) -> u8 {
151+
match mode {
152+
FseTableMode::Predefined(_) => 0,
153+
FseTableMode::Encoded(_) => 2,
154+
FseTableMode::RepeateLast(_) => 3,
155+
}
156+
}
157+
mode_to_bits(ll_mode) << 6 | mode_to_bits(of_mode) << 4 | mode_to_bits(ml_mode) << 2
158+
}
159+
160+
fn encode_sequences(
161+
sequences: &[crate::blocks::sequence_section::Sequence],
162+
writer: &mut BitWriter<&mut Vec<u8>>,
163+
ll_table: &FSETable,
164+
ml_table: &FSETable,
165+
of_table: &FSETable,
166+
) {
167+
let sequence = sequences[sequences.len() - 1];
168+
let (ll_code, ll_add_bits, ll_num_bits) = encode_literal_length(sequence.ll);
169+
let (of_code, of_add_bits, of_num_bits) = encode_offset(sequence.of);
170+
let (ml_code, ml_add_bits, ml_num_bits) = encode_match_len(sequence.ml);
171+
let mut ll_state: &State = ll_table.start_state(ll_code);
172+
let mut ml_state: &State = ml_table.start_state(ml_code);
173+
let mut of_state: &State = of_table.start_state(of_code);
174+
175+
writer.write_bits(ll_add_bits, ll_num_bits);
176+
writer.write_bits(ml_add_bits, ml_num_bits);
177+
writer.write_bits(of_add_bits, of_num_bits);
178+
179+
// encode backwards so the decoder reads the first sequence first
180+
if sequences.len() > 1 {
181+
for sequence in (0..=sequences.len() - 2).rev() {
182+
let sequence = sequences[sequence];
183+
let (ll_code, ll_add_bits, ll_num_bits) = encode_literal_length(sequence.ll);
184+
let (of_code, of_add_bits, of_num_bits) = encode_offset(sequence.of);
185+
let (ml_code, ml_add_bits, ml_num_bits) = encode_match_len(sequence.ml);
186+
187+
{
188+
let next = of_table.next_state(of_code, of_state.index);
189+
let diff = of_state.index - next.baseline;
190+
writer.write_bits(diff as u64, next.num_bits as usize);
191+
of_state = next;
192+
}
193+
{
194+
let next = ml_table.next_state(ml_code, ml_state.index);
195+
let diff = ml_state.index - next.baseline;
196+
writer.write_bits(diff as u64, next.num_bits as usize);
197+
ml_state = next;
198+
}
199+
{
200+
let next = ll_table.next_state(ll_code, ll_state.index);
201+
let diff = ll_state.index - next.baseline;
202+
writer.write_bits(diff as u64, next.num_bits as usize);
203+
ll_state = next;
204+
}
205+
206+
writer.write_bits(ll_add_bits, ll_num_bits);
207+
writer.write_bits(ml_add_bits, ml_num_bits);
208+
writer.write_bits(of_add_bits, of_num_bits);
209+
}
210+
}
211+
writer.write_bits(ml_state.index as u64, ml_table.table_size.ilog2() as usize);
212+
writer.write_bits(of_state.index as u64, of_table.table_size.ilog2() as usize);
213+
writer.write_bits(ll_state.index as u64, ll_table.table_size.ilog2() as usize);
214+
215+
let bits_to_fill = writer.misaligned();
216+
if bits_to_fill == 0 {
217+
writer.write_bits(1u32, 8);
218+
} else {
219+
writer.write_bits(1u32, bits_to_fill);
220+
}
221+
}
222+
118223
fn encode_seqnum(seqnum: usize, writer: &mut BitWriter<impl AsMut<Vec<u8>>>) {
119224
const UPPER_LIMIT: usize = 0xFFFF + 0x7F00;
120225
match seqnum {

src/encoding/frame_compressor.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use super::{
1212
block_header::BlockHeader, frame_header::FrameHeader, levels::*,
1313
match_generator::MatchGeneratorDriver, CompressionLevel, Matcher,
1414
};
15+
use crate::fse::fse_encoder::{default_ll_table, default_ml_table, default_of_table, FSETable};
1516

1617
use crate::io::{Read, Write};
1718

@@ -43,9 +44,32 @@ pub struct FrameCompressor<R: Read, W: Write, M: Matcher> {
4344
hasher: XxHash64,
4445
}
4546

47+
pub(crate) struct FseTables {
48+
pub(crate) ll_default: FSETable,
49+
pub(crate) ll_previous: Option<FSETable>,
50+
pub(crate) ml_default: FSETable,
51+
pub(crate) ml_previous: Option<FSETable>,
52+
pub(crate) of_default: FSETable,
53+
pub(crate) of_previous: Option<FSETable>,
54+
}
55+
56+
impl FseTables {
57+
pub fn new() -> Self {
58+
Self {
59+
ll_default: default_ll_table(),
60+
ll_previous: None,
61+
ml_default: default_ml_table(),
62+
ml_previous: None,
63+
of_default: default_of_table(),
64+
of_previous: None,
65+
}
66+
}
67+
}
68+
4669
pub(crate) struct CompressState<M: Matcher> {
4770
pub(crate) matcher: M,
4871
pub(crate) last_huff_table: Option<crate::huff0::huff0_encoder::HuffmanTable>,
72+
pub(crate) fse_tables: FseTables,
4973
}
5074

5175
impl<R: Read, W: Write> FrameCompressor<R, W, MatchGeneratorDriver> {
@@ -58,6 +82,7 @@ impl<R: Read, W: Write> FrameCompressor<R, W, MatchGeneratorDriver> {
5882
state: CompressState {
5983
matcher: MatchGeneratorDriver::new(1024 * 128, 1),
6084
last_huff_table: None,
85+
fse_tables: FseTables::new(),
6186
},
6287
#[cfg(feature = "hash")]
6388
hasher: XxHash64::with_seed(0),
@@ -74,6 +99,7 @@ impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
7499
state: CompressState {
75100
matcher,
76101
last_huff_table: None,
102+
fse_tables: FseTables::new(),
77103
},
78104
compression_level,
79105
#[cfg(feature = "hash")]

0 commit comments

Comments
 (0)