From 48cf2c5fd41e7423c6deab064ab4fb9c3f24b0e6 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 3 Oct 2024 10:42:43 +0200 Subject: [PATCH 1/6] add some buffer in the window so SIMD reads from it are always allowed --- test-libz-rs-sys/src/inflate.rs | 59 ++++++--------------------------- zlib-rs/src/inflate/window.rs | 10 ++++-- 2 files changed, 19 insertions(+), 50 deletions(-) diff --git a/test-libz-rs-sys/src/inflate.rs b/test-libz-rs-sys/src/inflate.rs index 4cda6383..beea4fa8 100644 --- a/test-libz-rs-sys/src/inflate.rs +++ b/test-libz-rs-sys/src/inflate.rs @@ -6,6 +6,8 @@ use zlib_rs::deflate::{compress_slice, DeflateConfig}; use zlib_rs::inflate::{set_mode_dict, uncompress_slice, InflateConfig, INFLATE_STATE_SIZE}; use zlib_rs::{InflateFlush, ReturnCode, MAX_WBITS}; +use crate::assert_eq_rs_ng; + const VERSION: *const c_char = libz_rs_sys::zlibVersion(); const STREAM_SIZE: c_int = core::mem::size_of::() as c_int; @@ -1030,7 +1032,7 @@ fn hello_world_uncompressed() { #[test] fn copy_direct_from_next_in_to_next_out() { - crate::assert_eq_rs_ng!({ + assert_eq_rs_ng!({ let input = [120, 1, 1, 2, 0, 253, 255, 6, 10, 0, 24, 0, 17]; let mut dest_vec = vec![0u8; 1 << 16]; @@ -1564,12 +1566,10 @@ fn window_match_bug() { let window_bits = -10; - let mut output_rs: Vec = Vec::with_capacity(1 << 15); - let mut output_ng: Vec = Vec::with_capacity(1 << 15); - - let mut buf = [0; 402]; + assert_eq_rs_ng!({ + let mut output: Vec = Vec::with_capacity(1 << 15); + let mut buf = [0; 402]; - { let mut stream = MaybeUninit::::zeroed(); let err = unsafe { @@ -1593,7 +1593,7 @@ fn window_match_bug() { let err = unsafe { inflate(stream, InflateFlush::Finish as _) }; - output_rs.extend(&buf[..buf.len() - stream.avail_out as usize]); + output.extend(&buf[..buf.len() - stream.avail_out as usize]); match ReturnCode::from(err) { ReturnCode::BufError => { @@ -1604,48 +1604,11 @@ fn window_match_bug() { other => panic!("unexpected {:?}", other), } } - } - { - use libz_sys::*; - - let mut stream = MaybeUninit::::zeroed(); + inflateEnd(stream); - let err = unsafe { - inflateInit2_( - stream.as_mut_ptr(), - window_bits, - zlibVersion(), - core::mem::size_of::() as c_int, - ) - }; - assert_eq!(ReturnCode::from(err), ReturnCode::Ok); - - let stream = unsafe { stream.assume_init_mut() }; - - stream.next_in = input.as_ptr() as *mut u8; - stream.avail_in = input.len() as _; - - while stream.avail_in != 0 { - stream.next_out = buf.as_mut_ptr(); - stream.avail_out = buf.len() as _; - - let err = unsafe { inflate(stream, InflateFlush::Finish as _) }; - - output_ng.extend(&buf[..buf.len() - stream.avail_out as usize]); - - match ReturnCode::from(err) { - ReturnCode::BufError => { - assert_eq!(stream.avail_out, 0); - stream.avail_out = buf.len() as _; - } - ReturnCode::StreamEnd => break, - other => panic!("unexpected {:?}", other), - } - } - } - - assert_eq!(output_rs, output_ng); + output + }); } #[test] @@ -1847,7 +1810,7 @@ fn issue_172() { 45, 1, 0, 176, 1, 57, 179, 15, 0, 0, 0, ]; - crate::assert_eq_rs_ng!({ + assert_eq_rs_ng!({ let mut exitcode = 0; for chunk in 1..BUF.len() { let mut ret; diff --git a/zlib-rs/src/inflate/window.rs b/zlib-rs/src/inflate/window.rs index 7360e358..25c06441 100644 --- a/zlib-rs/src/inflate/window.rs +++ b/zlib-rs/src/inflate/window.rs @@ -121,12 +121,18 @@ impl<'a> Window<'a> { } } else { let end_part = unsafe { slice_to_uninit(end_part) }; + self.buf[self.next..][..end_part.len()].copy_from_slice(end_part); + + // Initialize extra bytes so that SIMD reads from the window are always allowd. + // This is important for `Writer::extend_from_window`. + self.buf[self.next + end_part.len()..][..Self::padding()].fill(MaybeUninit::new(0)); } if !start_part.is_empty() { + let dst = &mut self.buf[..start_part.len()]; + if update_checksum { - let dst = &mut self.buf[..start_part.len()]; if flags != 0 { crc_fold.fold_copy(dst, start_part); } else { @@ -134,7 +140,7 @@ impl<'a> Window<'a> { } } else { let start_part = unsafe { slice_to_uninit(start_part) }; - self.buf[..start_part.len()].copy_from_slice(start_part); + dst.copy_from_slice(start_part); } self.next = start_part.len(); From 0306b20afb6ac9b5fce55f0440c0e8ef0637198e Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 3 Oct 2024 11:45:31 +0200 Subject: [PATCH 2/6] fix a number of miri bugs (mostly leaking memory) --- test-libz-rs-sys/src/inflate.rs | 129 +++++++++---------------------- zlib-rs/src/inflate.rs | 8 +- zlib-rs/src/inflate/bitreader.rs | 5 ++ zlib-rs/src/inflate/writer.rs | 1 + 4 files changed, 48 insertions(+), 95 deletions(-) diff --git a/test-libz-rs-sys/src/inflate.rs b/test-libz-rs-sys/src/inflate.rs index beea4fa8..14fadb87 100644 --- a/test-libz-rs-sys/src/inflate.rs +++ b/test-libz-rs-sys/src/inflate.rs @@ -205,9 +205,9 @@ fn gzip_header_check() { } let mut out = vec![0u8; len]; - let extra: [u8; 14] = [0; 14]; - let name: [u8; 9] = [0; 9]; - let comment: [u8; 10] = [0; 10]; + let mut extra: [u8; 14] = [0; 14]; + let mut name: [u8; 9] = [0; 9]; + let mut comment: [u8; 10] = [0; 10]; // Set header // See: https://www.zlib.net/manual.html @@ -216,12 +216,12 @@ fn gzip_header_check() { time: 0, xflags: 0, os: 0, - extra: extra.as_ptr() as *mut u8, + extra: extra.as_mut_ptr(), extra_len: 0, extra_max: 14, - name: name.as_ptr() as *mut u8, + name: name.as_mut_ptr(), name_max: 9, // How / where should this be set? - comment: comment.as_ptr() as *mut u8, + comment: comment.as_mut_ptr(), comm_max: 10, hcrc: 0, done: 0, @@ -293,9 +293,9 @@ fn inf(input: &[u8], _what: &str, step: usize, win: i32, len: usize, err: c_int) let mut out = vec![0u8; len]; - let extra: [u8; 1024] = [0; 1024]; - let name: [u8; 64] = [0; 64]; - let comment: [u8; 64] = [0; 64]; + let mut extra: [u8; 1024] = [0; 1024]; + let mut name: [u8; 64] = [0; 64]; + let mut comment: [u8; 64] = [0; 64]; // Set header // See: https://www.zlib.net/manual.html @@ -304,12 +304,12 @@ fn inf(input: &[u8], _what: &str, step: usize, win: i32, len: usize, err: c_int) time: 0, xflags: 0, os: 0, - extra: extra.as_ptr() as *mut u8, + extra: extra.as_mut_ptr(), extra_len: 0, extra_max: 1024, - name: name.as_ptr() as *mut u8, + name: name.as_mut_ptr(), name_max: 64, // How / where should this be set? - comment: comment.as_ptr() as *mut u8, + comment: comment.as_mut_ptr(), comm_max: 64, hcrc: 0, done: 0, @@ -638,6 +638,7 @@ fn try_inflate(input: &[u8], err: c_int) -> c_int { /* allocate work areas */ let size = len << 3; let mut out = vec![0; size]; + dbg!(out.as_slice().as_ptr_range()); // let mut win = vec![0; 32768]; // /* first with inflate */ @@ -1145,6 +1146,11 @@ fn inflate_get_header_non_gzip_stream() { unsafe { inflateGetHeader(&mut stream, &mut header) }, ReturnCode::StreamError as i32 ); + + let ret = unsafe { inflateEnd(&mut stream) }; + assert_eq!(ret, Z_OK); + + mem_done(&mut stream); } #[test] @@ -1426,6 +1432,8 @@ fn chunked_output_rs() { let err = unsafe { inflate(stream, InflateFlush::Finish as _) }; assert_eq!(ReturnCode::from(err), ReturnCode::StreamEnd); + unsafe { inflateEnd(stream) }; + assert_eq!(stream.total_out, 33); } @@ -1482,14 +1490,13 @@ fn version_error() { fn issue_109() { let input = &include_bytes!("test-data/issue-109.gz")[10..][..32758]; - let mut output_rs: Vec = Vec::with_capacity(1 << 15); - let mut output_ng: Vec = Vec::with_capacity(1 << 15); - let window_bits = -15; - let mut buf = [0; 8192]; + assert_eq_rs_ng!({ + let mut output: Vec = Vec::with_capacity(1 << 15); + + let mut buf = [0; 8192]; - { let mut stream = MaybeUninit::::zeroed(); let err = unsafe { @@ -1514,48 +1521,17 @@ fn issue_109() { let err = unsafe { inflate(stream, InflateFlush::NoFlush as _) }; if ReturnCode::from(err) == ReturnCode::BufError { - output_rs.extend(&buf[..stream.avail_out as usize]); + output.extend(&buf[..stream.avail_out as usize]); stream.avail_out = buf.len() as _; continue; } } - } - - { - use libz_sys::*; - - let mut stream = MaybeUninit::::zeroed(); - let err = unsafe { - inflateInit2_( - stream.as_mut_ptr(), - window_bits, - zlibVersion(), - core::mem::size_of::() as c_int, - ) - }; + let err = inflateEnd(stream); assert_eq!(ReturnCode::from(err), ReturnCode::Ok); - let stream = unsafe { stream.assume_init_mut() }; - - stream.next_in = input.as_ptr() as *mut u8; - stream.avail_in = input.len() as _; - - while stream.avail_in != 0 { - stream.next_out = buf.as_mut_ptr(); - stream.avail_out = buf.len() as _; - - let err = unsafe { inflate(stream, InflateFlush::NoFlush as _) }; - - if ReturnCode::from(err) == ReturnCode::BufError { - output_ng.extend(&buf[..stream.avail_out as usize]); - stream.avail_out = buf.len() as _; - continue; - } - } - } - - assert_eq!(output_rs, output_ng); + output + }); } #[test] @@ -1617,12 +1593,11 @@ fn op_len_edge_case() { let input = &include_bytes!("test-data/op-len-edge-case.zraw"); - let mut output_rs: Vec = Vec::with_capacity(1 << 15); - let mut output_ng: Vec = Vec::with_capacity(1 << 15); + assert_eq_rs_ng!({ + let mut output: Vec = Vec::with_capacity(1 << 15); - let mut buf = [0; 266]; + let mut buf = [0; 266]; - { let mut stream = MaybeUninit::::zeroed(); let err = unsafe { @@ -1647,48 +1622,17 @@ fn op_len_edge_case() { let err = unsafe { inflate(stream, InflateFlush::NoFlush as _) }; if ReturnCode::from(err) == ReturnCode::BufError { - output_rs.extend(&buf[..stream.avail_out as usize]); + output.extend(&buf[..stream.avail_out as usize]); stream.avail_out = buf.len() as _; continue; } } - } - - { - use libz_sys::*; - - let mut stream = MaybeUninit::::zeroed(); - let err = unsafe { - inflateInit2_( - stream.as_mut_ptr(), - window_bits, - zlibVersion(), - core::mem::size_of::() as c_int, - ) - }; + let err = inflateEnd(stream); assert_eq!(ReturnCode::from(err), ReturnCode::Ok); - let stream = unsafe { stream.assume_init_mut() }; - - stream.next_in = input.as_ptr() as *mut u8; - stream.avail_in = input.len() as _; - - while stream.avail_in != 0 { - stream.next_out = buf.as_mut_ptr(); - stream.avail_out = buf.len() as _; - - let err = unsafe { inflate(stream, InflateFlush::NoFlush as _) }; - - if ReturnCode::from(err) == ReturnCode::BufError { - output_ng.extend(&buf[..stream.avail_out as usize]); - stream.avail_out = buf.len() as _; - continue; - } - } - } - - assert_eq!(output_rs, output_ng); + output + }); } // Fills the provided buffer with pseudorandom bytes based on the given seed @@ -1844,6 +1788,9 @@ fn issue_172() { eprintln!("Output did not match at chunk size {}\n", chunk); exitcode = 1; } + + let err = inflateEnd(strm); + assert_eq!(ReturnCode::from(err), ReturnCode::Ok); } assert!(exitcode == 0); diff --git a/zlib-rs/src/inflate.rs b/zlib-rs/src/inflate.rs index 7ca41f4d..dcabed1c 100644 --- a/zlib-rs/src/inflate.rs +++ b/zlib-rs/src/inflate.rs @@ -707,9 +707,8 @@ impl<'a> State<'a> { if (self.gzip_flags & 0x0400) != 0 { // self.length is the number of remaining `extra` bytes. But they may not all be available let extra_available = Ord::min(self.length, self.bit_reader.bytes_remaining()); - let extra_slice = &self.bit_reader.as_slice()[..extra_available]; - if !extra_slice.is_empty() { + if extra_available > 0 { if let Some(head) = self.head.as_mut() { if !head.extra.is_null() { // at `head.extra`, the caller has reserved `head.extra_max` bytes. @@ -723,7 +722,7 @@ impl<'a> State<'a> { // min of number of bytes available at dst and at src let count = Ord::min( (head.extra_max as usize).saturating_sub(written_so_far), - extra_slice.len(), + extra_available, ); // location where we'll write: this saturates at the @@ -732,7 +731,7 @@ impl<'a> State<'a> { unsafe { core::ptr::copy_nonoverlapping( - self.bit_reader.as_ptr(), + self.bit_reader.as_mut_ptr(), head.extra.add(next_write_offset), count, ); @@ -742,6 +741,7 @@ impl<'a> State<'a> { // Checksum if (self.gzip_flags & 0x0200) != 0 && (self.wrap & 4) != 0 { + let extra_slice = &self.bit_reader.as_slice()[..extra_available]; self.checksum = crc32(self.checksum, extra_slice) } diff --git a/zlib-rs/src/inflate/bitreader.rs b/zlib-rs/src/inflate/bitreader.rs index b9657865..98139ac9 100644 --- a/zlib-rs/src/inflate/bitreader.rs +++ b/zlib-rs/src/inflate/bitreader.rs @@ -47,6 +47,11 @@ impl<'a> BitReader<'a> { self.ptr } + #[inline(always)] + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.ptr as *mut u8 + } + #[inline(always)] pub fn as_slice(&self) -> &[u8] { let len = self.bytes_remaining(); diff --git a/zlib-rs/src/inflate/writer.rs b/zlib-rs/src/inflate/writer.rs index 7219b17e..f6f1ab3d 100644 --- a/zlib-rs/src/inflate/writer.rs +++ b/zlib-rs/src/inflate/writer.rs @@ -107,6 +107,7 @@ impl<'a> Writer<'a> { range: Range, ) { let len = range.end - range.start; + if self.remaining() >= core::mem::size_of::() { // Safety: we know that our window has at least a core::mem::size_of::() extra bytes // at the end, making it always safe to perform an (unaligned) Chunk read anywhere in From 01e3f9aa099660abe47afb50191a14cb6dff0405 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 3 Oct 2024 11:47:27 +0200 Subject: [PATCH 3/6] fix out of bounds write --- zlib-rs/src/inflate/writer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zlib-rs/src/inflate/writer.rs b/zlib-rs/src/inflate/writer.rs index f6f1ab3d..2046ab05 100644 --- a/zlib-rs/src/inflate/writer.rs +++ b/zlib-rs/src/inflate/writer.rs @@ -108,7 +108,7 @@ impl<'a> Writer<'a> { ) { let len = range.end - range.start; - if self.remaining() >= core::mem::size_of::() { + if self.remaining() >= len + core::mem::size_of::() { // Safety: we know that our window has at least a core::mem::size_of::() extra bytes // at the end, making it always safe to perform an (unaligned) Chunk read anywhere in // the window slice. From a0b333ed632fd9270cb293fc16d398dac3fa4ebd Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 3 Oct 2024 13:31:50 +0200 Subject: [PATCH 4/6] fix memory leak in test --- test-libz-rs-sys/src/inflate.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test-libz-rs-sys/src/inflate.rs b/test-libz-rs-sys/src/inflate.rs index 14fadb87..00cc36ed 100644 --- a/test-libz-rs-sys/src/inflate.rs +++ b/test-libz-rs-sys/src/inflate.rs @@ -1450,6 +1450,9 @@ fn version_error() { }; assert_eq!(ret, Z_OK); + let ret = unsafe { inflateEnd(stream.as_mut_ptr()) }; + assert_eq!(ret, Z_OK); + // invalid stream size let ret = unsafe { inflateInit_(stream.as_mut_ptr(), zlibVersion(), 1) }; assert_eq!(ret, Z_VERSION_ERROR); From 34af1d34e9c60e04d8c8ba6ec9d5ff8463f03568 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 3 Oct 2024 13:31:58 +0200 Subject: [PATCH 5/6] disable very slow test for miri --- test-libz-rs-sys/src/inflate.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/test-libz-rs-sys/src/inflate.rs b/test-libz-rs-sys/src/inflate.rs index 00cc36ed..8aaee522 100644 --- a/test-libz-rs-sys/src/inflate.rs +++ b/test-libz-rs-sys/src/inflate.rs @@ -1655,6 +1655,7 @@ fn prng_bytes(seed: u64, bytes: &mut [u8], step: usize) { } #[test] +#[cfg_attr(miri, ignore = "slow")] fn test_inflate_flush_block() { let window_bits = -15; // Raw const CHUNK: usize = 16384; From ef8910f2d7d004094194b157fee478d8bba14c1c Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 3 Oct 2024 13:48:28 +0200 Subject: [PATCH 6/6] add miri tests for supported targets --- .github/workflows/miri.yaml | 51 +++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 .github/workflows/miri.yaml diff --git a/.github/workflows/miri.yaml b/.github/workflows/miri.yaml new file mode 100644 index 00000000..e1abe038 --- /dev/null +++ b/.github/workflows/miri.yaml @@ -0,0 +1,51 @@ +name: CI + +on: + pull_request: + branches: + - main + +jobs: + miri: + name: "Miri" + runs-on: ubuntu-latest + + strategy: + matrix: + include: + - target: "x86_64-unknown-linux-gnu" + features: "" + - target: "x86_64-unknown-linux-gnu" + features: "+avx2,+pclmulqdq" + + - target: "aarch64-unknown-linux-gnu" + features: "" + - target: "aarch64-unknown-linux-gnu" + features: "+neon,+crc" + + - target: "wasm32-unknown-unknown" + features: "" + - target: "wasm32-unknown-unknown" + features: "+simd128" + + - target: "s390x-unknown-linux-gnu" + features: "" + + - target: "i686-unknown-linux-gnu" + features: "" + + steps: + - uses: actions/checkout@v3 + - name: Install Miri + run: | + rustup toolchain install nightly --component miri + cargo +nightly miri setup + - name: Install cargo-nextest + uses: taiki-e/install-action@56ab7930c591507f833cbaed864d201386d518a8 + with: + tool: cargo-nextest + - name: Test public C api with NULL arguments + run: "cargo +nightly miri nextest run -j4 -p test-libz-rs-sys" + env: + RUSTFLAGS: "-Ctarget-feature=${{ matrix.features }}" + MIRIFLAGS: "-Zmiri-tree-borrows"