trifectatechfoundation
diff --git a/‎.github/workflows/checks.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/checks.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 2 additions & 3 deletions b/‎Cargo.lock‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎fuzz/Cargo.toml‎
Lines changed: 12 additions & 16 deletions b/‎fuzz/Cargo.toml‎
Lines changed: 12 additions & 16 deletions
diff --git a/‎fuzz/README.md‎
Lines changed: 14 additions & 0 deletions b/‎fuzz/README.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎fuzz/fuzz_targets/compress.rs‎
Lines changed: 0 additions & 33 deletions b/‎fuzz/fuzz_targets/compress.rs‎
Lines changed: 0 additions & 33 deletions
diff --git a/‎fuzz/fuzz_targets/compress_then_decompress.rs‎
Lines changed: 53 additions & 0 deletions b/‎fuzz/fuzz_targets/compress_then_decompress.rs‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎fuzz/fuzz_targets/compress_then_decompress_chunked.rs‎
Lines changed: 185 additions & 0 deletions b/‎fuzz/fuzz_targets/compress_then_decompress_chunked.rs‎
Lines changed: 185 additions & 0 deletions
@@ -234,9 +234,9 @@ jobs:
     strategy:
       matrix:
         include:
-          - fuzz_target: decompress
+          - fuzz_target: decompress_chunked
             corpus: "bzip2-files/compressed"
-            features: '--no-default-features --features="disable-checksum,keep-invalid-in-corpus"'
+            features: '--no-default-features --features="disable-checksum"'
           - fuzz_target: compress
             corpus: ""
             features: ''
 
@@ -16,7 +16,9 @@ default = ["rust-allocator"]
 c-allocator = ["libbz2-rs-sys/c-allocator"]
 rust-allocator = ["libbz2-rs-sys/rust-allocator"]
 disable-checksum = ["libbz2-rs-sys/__internal-fuzz-disable-checksum"]
-keep-invalid-in-corpus = [] # For code coverage (on CI), we want to keep inputs that triggered the error branches
+# actively reject and ignore invalid fuzz inputs during processing
+# this can have negative effects
+reject-invalid-in-corpus = [] 
 
 [dependencies.libfuzzer-sys]
 version = "0.4"
@@ -43,31 +45,25 @@ default-features = false
 members = ["."]
 
 [[bin]]
-name = "decompress"
-path = "fuzz_targets/decompress.rs"
-test = false
-doc = false
-
-[[bin]]
-name = "decompress_random_input"
-path = "fuzz_targets/decompress_random_input.rs"
+name = "decompress_chunked"
+path = "fuzz_targets/decompress_chunked.rs"
 test = false
 doc = false
 
 [[bin]]
-name = "decompress_chunked"
-path = "fuzz_targets/decompress_chunked.rs"
+name = "decompress"
+path = "fuzz_targets/decompress.rs"
 test = false
 doc = false
 
 [[bin]]
-name = "compress"
-path = "fuzz_targets/compress.rs"
+name = "compress_then_decompress_chunked"
+path = "fuzz_targets/compress_then_decompress_chunked.rs"
 test = false
 doc = false
 
 [[bin]]
-name = "end_to_end"
-path = "fuzz_targets/end_to_end.rs"
+name = "compress_then_decompress"
+path = "fuzz_targets/compress_then_decompress.rs"
 test = false
-doc = false
+doc = false
@@ -0,0 +1,14 @@
+# Fuzz
+
+## Seed corpus
+
+* https://github.com/trifectatechfoundation/compression-corpus
+* https://gitlab.com/bzip2/bzip2-testfiles
+* See the GitHub workflow definitions for more information on seed corpus usage
+
+## Fuzzer dictionary
+
+* There is an existing bzip2 format dictionary: https://github.com/google/fuzzing/blob/master/dictionaries/bz2.dict
+* This could be useful for fuzz tests which consume compressed input and attempt to decompress it
+* However, there are only very few common input chunks that bzip2 streams share with each other, so the practical benefits of running the fuzzer with this dictionary is likely limited
+* See https://llvm.org/docs/LibFuzzer.html#dictionaries for more background
@@ -0,0 +1,53 @@
+#![no_main]
+use libbz2_rs_sys::BZ_OK;
+use libfuzzer_sys::fuzz_target;
+
+fuzz_target!(|input: (&[u8], u8)| {
+    let (fuzzed_data, compression_decider) = input;
+
+    // let the fuzzer pick a value from 1 to 9 (inclusive)
+    // use modulo to ensure this always maps to a valid number
+    let compression_level: u8 = (compression_decider % 9) + 1;
+
+    // compress the fuzzer-controlled data via the Rust implementation
+    let (error, deflated) = unsafe {
+        test_libbz2_rs_sys::compress_rs_with_capacity(
+            4096,
+            fuzzed_data.as_ptr().cast(),
+            fuzzed_data.len() as _,
+            compression_level.into(),
+        )
+    };
+
+    // compress the fuzzer-controlled data via the C implementation
+    let (error_c, deflated_c) = unsafe {
+        test_libbz2_rs_sys::compress_c_with_capacity(
+            4096,
+            fuzzed_data.as_ptr().cast(),
+            fuzzed_data.len() as _,
+            compression_level.into(),
+        )
+    };
+
+    // differential testing: ensure both implementations behave identically
+    assert_eq!(error, error_c);
+    assert_eq!(deflated, deflated_c);
+
+    // this compression step should always succeed
+    assert_eq!(error, BZ_OK);
+
+    // decompress the previously compressed data again to test round-trip behavior
+    let (error, decompressed_output) = unsafe {
+        test_libbz2_rs_sys::decompress_rs_with_capacity(
+            1 << 10,
+            deflated.as_ptr(),
+            deflated.len() as _,
+        )
+    };
+    // this decompression of valid compressed data should always succeed
+    assert_eq!(error, BZ_OK);
+
+    // after the round trip through compression + decompression, the result data
+    // should be identical to the initial data
+    assert_eq!(decompressed_output, fuzzed_data);
+});
@@ -0,0 +1,185 @@
+#![no_main]
+use libbz2_rs_sys::bz_stream;
+use libbz2_rs_sys::BZ2_bzDecompress;
+use libbz2_rs_sys::BZ2_bzDecompressEnd;
+use libbz2_rs_sys::BZ2_bzDecompressInit;
+use libbz2_rs_sys::{
+    BZ_CONFIG_ERROR, BZ_DATA_ERROR, BZ_DATA_ERROR_MAGIC, BZ_FINISH, BZ_FINISH_OK, BZ_FLUSH_OK,
+    BZ_IO_ERROR, BZ_MEM_ERROR, BZ_OK, BZ_OUTBUFF_FULL, BZ_PARAM_ERROR, BZ_RUN_OK,
+    BZ_SEQUENCE_ERROR, BZ_STREAM_END, BZ_UNEXPECTED_EOF,
+};
+
+use libfuzzer_sys::fuzz_target;
+
+fn compress_c(data: &[u8], compression_level: u8, work_factor: u8) -> Vec<u8> {
+    // compress the data with the stock C bzip2
+
+    // output buffer for compression, will get resized later if needed
+    let mut output = vec![0u8; 1024];
+
+    let mut stream = libbz2_rs_sys::bz_stream {
+        next_in: data.as_ptr() as *mut _,
+        avail_in: data.len() as _,
+        total_in_lo32: 0,
+        total_in_hi32: 0,
+        next_out: output.as_mut_ptr() as *mut _,
+        avail_out: output.len() as _,
+        total_out_lo32: 0,
+        total_out_hi32: 0,
+        state: std::ptr::null_mut(),
+        bzalloc: None,
+        bzfree: None,
+        opaque: std::ptr::null_mut(),
+    };
+
+    unsafe {
+        let err = libbz2_rs_sys::BZ2_bzCompressInit(
+            &mut stream,
+            compression_level.into(),
+            0,
+            work_factor.into(),
+        );
+        // init should always succeed
+        assert_eq!(err, BZ_OK);
+    };
+
+    let error = loop {
+        match unsafe { libbz2_rs_sys::BZ2_bzCompress(&mut stream, BZ_FINISH) } {
+            BZ_FINISH_OK => {
+                let used = output.len() - stream.avail_out as usize;
+                // The output buffer is full, resize it
+                let add_space: u32 = Ord::max(1024, output.len().try_into().unwrap());
+                output.resize(output.len() + add_space as usize, 0);
+
+                // If resize() reallocates, it may have moved in memory
+                stream.next_out = output.as_mut_ptr().cast::<i8>().wrapping_add(used);
+                stream.avail_out += add_space;
+
+                continue;
+            }
+            BZ_STREAM_END => {
+                break BZ_OK;
+            }
+            ret => {
+                break ret;
+            }
+        }
+    };
+
+    // compression should always succeed
+    assert_eq!(error, BZ_OK);
+
+    // truncate the output buffer down to the actual number of compressed bytes
+    output.truncate(
+        ((u64::from(stream.total_out_hi32) << 32) + u64::from(stream.total_out_lo32))
+            .try_into()
+            .unwrap(),
+    );
+
+    unsafe {
+        // cleanup, should always succeed
+        let err = libbz2_rs_sys::BZ2_bzCompressEnd(&mut stream);
+        assert_eq!(err, BZ_OK);
+    }
+
+    output
+}
+
+fuzz_target!(|input: (&[u8], usize, u8, u8)| {
+    let (fuzzer_data, chunk_size, compression_decider, work_factor_decider) = input;
+
+    // let the fuzzer pick a value from 1 to 9 (inclusive)
+    // use modulo to ensure this always maps to a valid number
+    let compression_level: u8 = (compression_decider % 9) + 1;
+
+    // valid values 0 to 250 (inclusive)
+    // use modulo to ensure this always maps to a valid number
+    let work_factor = work_factor_decider % 251;
+
+    if chunk_size == 0 {
+        // we can't iterate over chunks of size 0 byte, exit early
+        return;
+    }
+
+    let compressed_data = compress_c(fuzzer_data, compression_level, work_factor);
+
+    let mut stream = bz_stream::zeroed();
+
+    unsafe {
+        let err = BZ2_bzDecompressInit(&mut stream, 0, 0);
+        assert_eq!(err, BZ_OK);
+    };
+
+    // output buffer for decompression, will get resized later if needed
+    let mut output = vec![0u8; 1 << 10];
+    stream.next_out = output.as_mut_ptr() as *mut _;
+    stream.avail_out = output.len() as _;
+
+    // iterate over chunks of the compressed data
+    'decompression: for chunk in compressed_data.as_slice().chunks(chunk_size) {
+        // designate the current chunk as input
+        stream.next_in = chunk.as_ptr() as *mut _;
+        stream.avail_in = chunk.len() as _;
+
+        loop {
+            // perform one round of decompression
+            let err = unsafe { BZ2_bzDecompress(&mut stream) };
+            match err {
+                BZ_OK => {
+                    // the decompression mechanism still has data in the input buffer,
+                    // but no more space in the output buffer
+                    if stream.avail_in > 0 && stream.avail_out == 0 {
+                        let used = output.len() - stream.avail_out as usize;
+                        // The dest buffer is full, increase its size
+                        let add_space: u32 = Ord::max(1024, output.len().try_into().unwrap());
+                        output.resize(output.len() + add_space as usize, 0);
+
+                        // If resize() reallocates, it may have moved in memory
+                        stream.next_out = output.as_mut_ptr().cast::<i8>().wrapping_add(used);
+                        stream.avail_out += add_space;
+                        continue;
+                    } else {
+                        // the decompression of this chunk step was successful, move on to the next
+                        break;
+                    }
+                }
+                BZ_STREAM_END => {
+                    // the decompression has completed, exit loops
+                    break 'decompression;
+                }
+                BZ_RUN_OK => panic!("BZ_RUN_OK"),
+                BZ_FLUSH_OK => panic!("BZ_FLUSH_OK"),
+                BZ_FINISH_OK => panic!("BZ_FINISH_OK"),
+                BZ_SEQUENCE_ERROR => panic!("BZ_SEQUENCE_ERROR"),
+                BZ_PARAM_ERROR => panic!("BZ_PARAM_ERROR"),
+                BZ_MEM_ERROR => panic!("BZ_MEM_ERROR"),
+                BZ_DATA_ERROR => {
+                    panic!("BZ_DATA_ERROR")
+                }
+                BZ_DATA_ERROR_MAGIC => panic!("BZ_DATA_ERROR_MAGIC"),
+                BZ_IO_ERROR => panic!("BZ_IO_ERROR"),
+                BZ_UNEXPECTED_EOF => panic!("BZ_UNEXPECTED_EOF"),
+                BZ_OUTBUFF_FULL => panic!("BZ_OUTBUFF_FULL"),
+                BZ_CONFIG_ERROR => panic!("BZ_CONFIG_ERROR"),
+                _ => panic!("{err}"),
+            }
+        }
+    }
+
+    // truncate the output buffer down to the actual number of decompressed bytes
+    output.truncate(
+        ((u64::from(stream.total_out_hi32) << 32) + u64::from(stream.total_out_lo32))
+            .try_into()
+            .unwrap(),
+    );
+
+    unsafe {
+        // cleanup, should always succeed
+        let err = BZ2_bzDecompressEnd(&mut stream);
+        assert_eq!(err, BZ_OK);
+    }
+
+    // round-trip check
+    // compressing and then decompressing should lead back to the input data
+    assert_eq!(output, fuzzer_data);
+});