From 9eb35c736aad183548f6f53d1060db537c693097 Mon Sep 17 00:00:00 2001 From: Andrew Liebenow Date: Tue, 24 Sep 2024 19:25:29 -0500 Subject: [PATCH] Fix two bugs. Add property testing. --- Cargo.lock | 105 ++++-- src/uu/base32/Cargo.toml | 3 + src/uu/base32/src/base_common.rs | 147 ++++---- src/uu/base32/tests/property_tests.rs | 430 ++++++++++++++++++++++++ src/uu/factor/Cargo.toml | 3 - src/uucore/src/lib/features/encoding.rs | 17 +- tests/by-util/test_basenc.rs | 31 +- 7 files changed, 608 insertions(+), 128 deletions(-) create mode 100644 src/uu/base32/tests/property_tests.rs diff --git a/Cargo.lock b/Cargo.lock index adec856f907..b3bb27bcca0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -185,6 +185,21 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -850,16 +865,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "env_logger" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" -dependencies = [ - "log", - "regex", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -1548,6 +1553,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1791,22 +1797,37 @@ dependencies = [ "hex", ] +[[package]] +name = "proptest" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.6.0", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + [[package]] name = "quick-error" -version = "2.0.1" +version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] -name = "quickcheck" -version = "1.0.3" +name = "quick-error" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "env_logger", - "log", - "rand", -] +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quote" @@ -1862,6 +1883,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + [[package]] name = "rayon" version = "1.10.0" @@ -2030,6 +2060,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error 1.2.3", + "tempfile", + "wait-timeout", +] + [[package]] name = "same-file" version = "1.0.6" @@ -2396,6 +2438,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" version = "1.0.5" @@ -2476,6 +2524,7 @@ name = "uu_base32" version = "0.0.27" dependencies = [ "clap", + "proptest", "uucore", ] @@ -2586,7 +2635,7 @@ dependencies = [ "filetime", "indicatif", "libc", - "quick-error", + "quick-error 2.0.1", "selinux", "uucore", "walkdir", @@ -2730,7 +2779,6 @@ dependencies = [ "num-bigint", "num-prime", "num-traits", - "quickcheck", "rand", "smallvec", "uucore", @@ -3035,7 +3083,7 @@ dependencies = [ "chrono", "clap", "itertools", - "quick-error", + "quick-error 2.0.1", "regex", "uucore", ] @@ -3533,6 +3581,15 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -3634,7 +3691,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/src/uu/base32/Cargo.toml b/src/uu/base32/Cargo.toml index 152091aa3a6..88724360f7e 100644 --- a/src/uu/base32/Cargo.toml +++ b/src/uu/base32/Cargo.toml @@ -20,6 +20,9 @@ path = "src/base32.rs" clap = { workspace = true } uucore = { workspace = true, features = ["encoding"] } +[dev-dependencies] +proptest = "1.5.0" + [[bin]] name = "base32" path = "src/main.rs" diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 5bee873dad7..9a56d131656 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -7,7 +7,7 @@ use clap::{crate_version, Arg, ArgAction, Command}; use std::fs::File; -use std::io::{ErrorKind, Read, Stdin}; +use std::io::{self, ErrorKind, Read, Stdin}; use std::path::Path; use uucore::display::Quotable; use uucore::encoding::{ @@ -25,7 +25,7 @@ pub const BASE_CMD_PARSE_ERROR: i32 = 1; /// Other implementations default to 76 /// /// This default is only used if no "-w"/"--wrap" argument is passed -const WRAP_DEFAULT: usize = 76; +pub const WRAP_DEFAULT: usize = 76; pub struct Config { pub decode: bool, @@ -158,6 +158,28 @@ pub fn handle_input( ignore_garbage: bool, decode: bool, ) -> UResult<()> { + let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); + + let mut stdout_lock = io::stdout().lock(); + + if decode { + fast_decode::fast_decode( + input, + &mut stdout_lock, + supports_fast_decode_and_encode.as_ref(), + ignore_garbage, + ) + } else { + fast_encode::fast_encode( + input, + &mut stdout_lock, + supports_fast_decode_and_encode.as_ref(), + wrap, + ) + } +} + +pub fn get_supports_fast_decode_and_encode(format: Format) -> Box { const BASE16_VALID_DECODING_MULTIPLE: usize = 2; const BASE2_VALID_DECODING_MULTIPLE: usize = 8; const BASE32_VALID_DECODING_MULTIPLE: usize = 8; @@ -168,7 +190,7 @@ pub fn handle_input( const BASE32_UNPADDED_MULTIPLE: usize = 5; const BASE64_UNPADDED_MULTIPLE: usize = 3; - let supports_fast_decode_and_encode: Box = match format { + match format { Format::Base16 => Box::from(EncodingWrapper::new( HEXUPPER, BASE16_VALID_DECODING_MULTIPLE, @@ -219,26 +241,14 @@ pub fn handle_input( b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-", )), Format::Z85 => Box::from(Z85Wrapper {}), - }; - - if decode { - fast_decode::fast_decode( - input, - supports_fast_decode_and_encode.as_ref(), - ignore_garbage, - )?; - } else { - fast_encode::fast_encode(input, supports_fast_decode_and_encode.as_ref(), wrap)?; } - - Ok(()) } -mod fast_encode { +pub mod fast_encode { use crate::base_common::{format_read_error, WRAP_DEFAULT}; use std::{ collections::VecDeque, - io::{self, ErrorKind, Read, StdoutLock, Write}, + io::{self, ErrorKind, Read, Write}, num::NonZeroUsize, }; use uucore::{ @@ -299,17 +309,17 @@ mod fast_encode { fn write_without_line_breaks( encoded_buffer: &mut VecDeque, - stdout_lock: &mut StdoutLock, + output: &mut dyn Write, is_cleanup: bool, ) -> io::Result<()> { // TODO // `encoded_buffer` only has to be a VecDeque if line wrapping is enabled // (`make_contiguous` should be a no-op here) // Refactoring could avoid this call - stdout_lock.write_all(encoded_buffer.make_contiguous())?; + output.write_all(encoded_buffer.make_contiguous())?; if is_cleanup { - stdout_lock.write_all(b"\n")?; + output.write_all(b"\n")?; } else { encoded_buffer.clear(); } @@ -323,7 +333,7 @@ mod fast_encode { ref mut print_buffer, }: &mut LineWrapping, encoded_buffer: &mut VecDeque, - stdout_lock: &mut StdoutLock, + output: &mut dyn Write, is_cleanup: bool, ) -> io::Result<()> { let line_length = line_length.get(); @@ -341,7 +351,7 @@ mod fast_encode { print_buffer.push(b'\n'); } - stdout_lock.write_all(print_buffer)?; + output.write_all(print_buffer)?; // Remove the bytes that were just printed from `encoded_buffer` drop(encoded_buffer.drain(..bytes_added_to_print_buffer)); @@ -351,8 +361,8 @@ mod fast_encode { // Do not write a newline in this case, because two trailing newlines should never be printed } else { // Print the partial line, since this is cleanup and no more data is coming - stdout_lock.write_all(encoded_buffer.make_contiguous())?; - stdout_lock.write_all(b"\n")?; + output.write_all(encoded_buffer.make_contiguous())?; + output.write_all(b"\n")?; } } else { print_buffer.clear(); @@ -361,27 +371,28 @@ mod fast_encode { Ok(()) } - fn write_to_stdout( + fn write_to_output( line_wrapping_option: &mut Option, encoded_buffer: &mut VecDeque, - stdout_lock: &mut StdoutLock, + output: &mut dyn Write, is_cleanup: bool, ) -> io::Result<()> { - // Write all data in `encoded_buffer` to stdout + // Write all data in `encoded_buffer` to `output` if let &mut Some(ref mut li) = line_wrapping_option { - write_with_line_breaks(li, encoded_buffer, stdout_lock, is_cleanup)?; + write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?; } else { - write_without_line_breaks(encoded_buffer, stdout_lock, is_cleanup)?; + write_without_line_breaks(encoded_buffer, output, is_cleanup)?; } Ok(()) } // End of helper functions - pub fn fast_encode( + pub fn fast_encode( input: &mut R, + mut output: W, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, - line_wrap: Option, + wrap: Option, ) -> UResult<()> { // Based on performance testing const INPUT_BUFFER_SIZE: usize = 32 * 1_024; @@ -393,10 +404,10 @@ mod fast_encode { assert!(encode_in_chunks_of_size > 0); - // "data-encoding" supports line wrapping, but not arbitrary line wrapping, only certain widths, so line - // wrapping must be implemented here. + // The "data-encoding" crate supports line wrapping, but not arbitrary line wrapping, only certain widths, so + // line wrapping must be handled here. // https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710 - let mut line_wrapping_option = match line_wrap { + let mut line_wrapping = match wrap { // Line wrapping is disabled because "-w"/"--wrap" was passed with "0" Some(0) => None, // A custom line wrapping value was passed @@ -420,12 +431,10 @@ mod fast_encode { // Data that was read from stdin but has not been encoded yet let mut leftover_buffer = VecDeque::::new(); - // Encoded data that needs to be written to stdout + // Encoded data that needs to be written to output let mut encoded_buffer = VecDeque::::new(); // End of buffers - let mut stdout_lock = io::stdout().lock(); - loop { match input.read(&mut input_buffer) { Ok(bytes_read_from_input) => { @@ -443,6 +452,8 @@ mod fast_encode { // Do not have enough data to encode a chunk, so copy data to `leftover_buffer` and read more leftover_buffer.extend(read_buffer); + assert!(leftover_buffer.len() < encode_in_chunks_of_size); + continue; } @@ -456,13 +467,10 @@ mod fast_encode { &mut leftover_buffer, )?; - // Write all data in `encoded_buffer` to stdout - write_to_stdout( - &mut line_wrapping_option, - &mut encoded_buffer, - &mut stdout_lock, - false, - )?; + assert!(leftover_buffer.len() < encode_in_chunks_of_size); + + // Write all data in `encoded_buffer` to output + write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?; } Err(er) => { let kind = er.kind(); @@ -484,23 +492,18 @@ mod fast_encode { supports_fast_decode_and_encode .encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?; - // Write all data in `encoded_buffer` to stdout + // Write all data in `encoded_buffer` to output // `is_cleanup` triggers special cleanup-only logic - write_to_stdout( - &mut line_wrapping_option, - &mut encoded_buffer, - &mut stdout_lock, - true, - )?; + write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, true)?; } Ok(()) } } -mod fast_decode { +pub mod fast_decode { use crate::base_common::format_read_error; - use std::io::{self, ErrorKind, Read, StdoutLock, Write}; + use std::io::{self, ErrorKind, Read, Write}; use uucore::{ encoding::SupportsFastDecodeAndEncode, error::{UResult, USimpleError}, @@ -588,12 +591,9 @@ mod fast_decode { Ok(()) } - fn write_to_stdout( - decoded_buffer: &mut Vec, - stdout_lock: &mut StdoutLock, - ) -> io::Result<()> { - // Write all data in `decoded_buffer` to stdout - stdout_lock.write_all(decoded_buffer.as_slice())?; + fn write_to_output(decoded_buffer: &mut Vec, output: &mut dyn Write) -> io::Result<()> { + // Write all data in `decoded_buffer` to `output` + output.write_all(decoded_buffer.as_slice())?; decoded_buffer.clear(); @@ -601,10 +601,9 @@ mod fast_decode { } // End of helper functions - /// `encoding`, `decode_in_chunks_of_size`, and `alphabet` are passed in a tuple to indicate that they are - /// logically tied - pub fn fast_decode( + pub fn fast_decode( input: &mut R, + mut output: &mut W, supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, ignore_garbage: bool, ) -> UResult<()> { @@ -640,7 +639,7 @@ mod fast_decode { // Data that was read from stdin but has not been decoded yet let mut leftover_buffer = Vec::::new(); - // Decoded data that needs to be written to stdout + // Decoded data that needs to be written to `output` let mut decoded_buffer = Vec::::new(); // Buffer that will be used when "ignore_garbage" is true, and the chunk read from "input" contains garbage @@ -648,8 +647,6 @@ mod fast_decode { let mut non_garbage_buffer = Vec::::new(); // End of buffers - let mut stdout_lock = io::stdout().lock(); - loop { match input.read(&mut input_buffer) { Ok(bytes_read_from_input) => { @@ -687,10 +684,12 @@ mod fast_decode { // How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size let bytes_to_steal = decode_in_chunks_of_size - leftover_buffer.len(); - if bytes_to_steal > bytes_read_from_input { + if bytes_to_steal > read_buffer_filtered.len() { // Do not have enough data to decode a chunk, so copy data to `leftover_buffer` and read more leftover_buffer.extend(read_buffer_filtered); + assert!(leftover_buffer.len() < decode_in_chunks_of_size); + continue; } @@ -700,12 +699,14 @@ mod fast_decode { decode_in_chunks_of_size, bytes_to_steal, read_buffer_filtered, - &mut leftover_buffer, &mut decoded_buffer, + &mut leftover_buffer, )?; - // Write all data in `decoded_buffer` to stdout - write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?; + assert!(leftover_buffer.len() < decode_in_chunks_of_size); + + // Write all data in `decoded_buffer` to `output` + write_to_output(&mut decoded_buffer, &mut output)?; } Err(er) => { let kind = er.kind(); @@ -727,8 +728,8 @@ mod fast_decode { supports_fast_decode_and_encode .decode_into_vec(&leftover_buffer, &mut decoded_buffer)?; - // Write all data in `decoded_buffer` to stdout - write_to_stdout(&mut decoded_buffer, &mut stdout_lock)?; + // Write all data in `decoded_buffer` to `output` + write_to_output(&mut decoded_buffer, &mut output)?; } Ok(()) @@ -739,7 +740,7 @@ fn format_read_error(kind: ErrorKind) -> String { let kind_string = kind.to_string(); // e.g. "is a directory" -> "Is a directory" - let kind_string_uncapitalized = kind_string + let kind_string_capitalized = kind_string .char_indices() .map(|(index, character)| { if index == 0 { @@ -750,5 +751,5 @@ fn format_read_error(kind: ErrorKind) -> String { }) .collect::(); - format!("read error: {kind_string_uncapitalized}") + format!("read error: {kind_string_capitalized}") } diff --git a/src/uu/base32/tests/property_tests.rs b/src/uu/base32/tests/property_tests.rs new file mode 100644 index 00000000000..0f2393c42ab --- /dev/null +++ b/src/uu/base32/tests/property_tests.rs @@ -0,0 +1,430 @@ +// spell-checker:ignore lsbf msbf proptest + +use proptest::{prelude::TestCaseError, prop_assert, prop_assert_eq, test_runner::TestRunner}; +use std::io::Cursor; +use uu_base32::base_common::{fast_decode, fast_encode, get_supports_fast_decode_and_encode}; +use uucore::encoding::{Format, SupportsFastDecodeAndEncode}; + +const CASES: u32 = { + #[cfg(debug_assertions)] + { + 32 + } + + #[cfg(not(debug_assertions))] + { + 128 + } +}; + +const NORMAL_INPUT_SIZE_LIMIT: usize = { + #[cfg(debug_assertions)] + { + // 256 kibibytes + 256 * 1024 + } + + #[cfg(not(debug_assertions))] + { + // 4 mebibytes + 4 * 1024 * 1024 + } +}; + +const LARGE_INPUT_SIZE_LIMIT: usize = 4 * NORMAL_INPUT_SIZE_LIMIT; + +// Note that `TestRunner`s cannot be reused +fn get_test_runner() -> TestRunner { + TestRunner::new(proptest::test_runner::Config { + cases: CASES, + failure_persistence: None, + + ..proptest::test_runner::Config::default() + }) +} + +fn generic_round_trip(format: Format) { + let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); + + let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); + + // Make sure empty inputs round trip + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + ), + |(ignore_garbage, line_wrap_zero, line_wrap)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + // Empty input + Vec::::new(), + ) + }, + ) + .unwrap(); + } + + // Unusually large line wrapping settings + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(512_usize..65_535_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } + + // Spend more time on sane line wrapping settings + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } + + // Test with garbage data + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + // Garbage data to insert + proptest::collection::vec( + ( + // Random index + proptest::num::usize::ANY, + // In all of the encodings being tested, non-ASCII bytes are garbage + 128_u8..=u8::MAX, + ), + 0..4_096, + ), + proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + garbage_data, + input, + ) + }, + ) + .unwrap(); + } + + // Test small inputs + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..1_024), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } + + // Test small inputs with garbage data + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + // Garbage data to insert + proptest::collection::vec( + ( + // Random index + proptest::num::usize::ANY, + // In all of the encodings being tested, non-ASCII bytes are garbage + 128_u8..=u8::MAX, + ), + 0..1_024, + ), + proptest::collection::vec(proptest::num::u8::ANY, 0..1_024), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + garbage_data, + input, + ) + }, + ) + .unwrap(); + } + + // Test large inputs + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..LARGE_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } +} + +fn configurable_round_trip( + format: Format, + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + ignore_garbage: bool, + line_wrap_zero: bool, + line_wrap: Option, + garbage_data: Vec<(usize, u8)>, + mut input: Vec, +) -> Result<(), TestCaseError> { + // Z85 only accepts inputs with lengths divisible by 4 + if let Format::Z85 = format { + // Reduce length of "input" until it is divisible by 4 + input.truncate((input.len() / 4) * 4); + + assert!((input.len() % 4) == 0); + } + + let line_wrap_to_use = if line_wrap_zero { Some(0) } else { line_wrap }; + + let input_len = input.len(); + + let garbage_data_len = garbage_data.len(); + + let garbage_data_is_empty = garbage_data_len == 0; + + let (input, encoded) = { + let mut output = Vec::with_capacity(input_len * 8); + + let mut cursor = Cursor::new(input); + + fast_encode::fast_encode( + &mut cursor, + &mut output, + supports_fast_decode_and_encode, + line_wrap_to_use, + ) + .unwrap(); + + (cursor.into_inner(), output) + }; + + let encoded_or_encoded_with_garbage = if garbage_data_is_empty { + encoded + } else { + let encoded_len = encoded.len(); + + let encoded_highest_index = match encoded_len.checked_sub(1) { + Some(0) | None => None, + Some(x) => Some(x), + }; + + let mut garbage_data_indexed = vec![Option::::None; encoded_len]; + + let mut encoded_with_garbage = Vec::::with_capacity(encoded_len + garbage_data_len); + + for (index, garbage_byte) in garbage_data { + if let Some(x) = encoded_highest_index { + let index_to_use = index % x; + + garbage_data_indexed[index_to_use] = Some(garbage_byte); + } else { + encoded_with_garbage.push(garbage_byte); + } + } + + for (index, encoded_byte) in encoded.into_iter().enumerate() { + encoded_with_garbage.push(encoded_byte); + + if let Some(garbage_byte) = garbage_data_indexed[index] { + encoded_with_garbage.push(garbage_byte); + } + } + + encoded_with_garbage + }; + + match line_wrap_to_use { + Some(0) => { + let line_endings_count = encoded_or_encoded_with_garbage + .iter() + .filter(|byte| **byte == b'\n') + .count(); + + // If line wrapping is disabled, there should only be one '\n' character (at the very end of the output) + prop_assert_eq!(line_endings_count, 1); + } + _ => { + // TODO + // Validate other line wrapping settings + } + } + + let decoded_or_error = { + let mut output = Vec::with_capacity(input_len); + + let mut cursor = Cursor::new(encoded_or_encoded_with_garbage); + + match fast_decode::fast_decode( + &mut cursor, + &mut output, + supports_fast_decode_and_encode, + ignore_garbage, + ) { + Ok(()) => Ok(output), + Err(er) => Err(er), + } + }; + + let made_round_trip = match decoded_or_error { + Ok(ve) => input.as_slice() == ve.as_slice(), + Err(_) => false, + }; + + let result_was_correct = if garbage_data_is_empty || ignore_garbage { + // If there was no garbage data added, or if "ignore_garbage" was enabled, expect the round trip to succeed + made_round_trip + } else { + // If garbage data was added, and "ignore_garbage" was disabled, expect the round trip to fail + + !made_round_trip + }; + + if !result_was_correct { + eprintln!( + "\ +(configurable_round_trip) FAILURE +format: {format:?} +ignore_garbage: {ignore_garbage} +line_wrap_to_use: {line_wrap_to_use:?} +garbage_data_len: {garbage_data_len} +input_len: {input_len} +", + ); + } + + prop_assert!(result_was_correct); + + Ok(()) +} + +#[test] +fn base16_round_trip() { + generic_round_trip(Format::Base16); +} + +#[test] +fn base2lsbf_round_trip() { + generic_round_trip(Format::Base2Lsbf); +} + +#[test] +fn base2msbf_round_trip() { + generic_round_trip(Format::Base2Msbf); +} + +#[test] +fn base32_round_trip() { + generic_round_trip(Format::Base32); +} + +#[test] +fn base32hex_round_trip() { + generic_round_trip(Format::Base32Hex); +} + +#[test] +fn base64_round_trip() { + generic_round_trip(Format::Base64); +} + +#[test] +fn base64url_round_trip() { + generic_round_trip(Format::Base64Url); +} + +#[test] +fn z85_round_trip() { + generic_round_trip(Format::Z85); +} diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index 49e836befa3..e28db8e6377 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -26,9 +26,6 @@ uucore = { workspace = true } num-bigint = { workspace = true } num-prime = { workspace = true } -[dev-dependencies] -quickcheck = "1.0.3" - [[bin]] name = "factor" path = "src/main.rs" diff --git a/src/uucore/src/lib/features/encoding.rs b/src/uucore/src/lib/features/encoding.rs index 0d2ce622297..b213da00761 100644 --- a/src/uucore/src/lib/features/encoding.rs +++ b/src/uucore/src/lib/features/encoding.rs @@ -19,7 +19,7 @@ pub mod for_cksum { pub use data_encoding::BASE64; } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum Format { Base64, Base64Url, @@ -57,6 +57,12 @@ impl EncodingWrapper { unpadded_multiple: usize, alphabet: &'static [u8], ) -> Self { + assert!(valid_decoding_multiple > 0); + + assert!(unpadded_multiple > 0); + + assert!(!alphabet.is_empty()); + Self { alphabet, encoding, @@ -110,15 +116,6 @@ impl SupportsFastDecodeAndEncode for Z85Wrapper { return Err(USimpleError::new(1, "error: invalid input".to_owned())); } - // According to the spec we should not accept inputs whose len is not a multiple of 4. - // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. - if input.len() % 4 != 0 { - return Err(USimpleError::new( - 1, - "error: invalid input (length must be multiple of 4 characters)".to_owned(), - )); - }; - let decode_result = match z85::decode(input) { Ok(ve) => ve, Err(_de) => { diff --git a/tests/by-util/test_basenc.rs b/tests/by-util/test_basenc.rs index ce7e864a301..437413a26b7 100644 --- a/tests/by-util/test_basenc.rs +++ b/tests/by-util/test_basenc.rs @@ -45,7 +45,6 @@ fn test_base64() { .arg("--base64") .pipe_in("to>be?") .succeeds() - .no_stderr() .stdout_only("dG8+YmU/\n"); } @@ -55,7 +54,6 @@ fn test_base64_decode() { .args(&["--base64", "-d"]) .pipe_in("dG8+YmU/") .succeeds() - .no_stderr() .stdout_only("to>be?"); } @@ -65,7 +63,6 @@ fn test_base64url() { .arg("--base64url") .pipe_in("to>be?") .succeeds() - .no_stderr() .stdout_only("dG8-YmU_\n"); } @@ -75,7 +72,6 @@ fn test_base64url_decode() { .args(&["--base64url", "-d"]) .pipe_in("dG8-YmU_") .succeeds() - .no_stderr() .stdout_only("to>be?"); } @@ -85,7 +81,6 @@ fn test_base32() { .arg("--base32") .pipe_in("nice>base?") .succeeds() - .no_stderr() .stdout_only("NZUWGZJ6MJQXGZJ7\n"); // spell-checker:disable-line } @@ -95,7 +90,6 @@ fn test_base32_decode() { .args(&["--base32", "-d"]) .pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line .succeeds() - .no_stderr() .stdout_only("nice>base?"); } @@ -105,7 +99,6 @@ fn test_base32hex() { .arg("--base32hex") .pipe_in("nice>base?") .succeeds() - .no_stderr() .stdout_only("DPKM6P9UC9GN6P9V\n"); // spell-checker:disable-line } @@ -115,7 +108,6 @@ fn test_base32hex_decode() { .args(&["--base32hex", "-d"]) .pipe_in("DPKM6P9UC9GN6P9V") // spell-checker:disable-line .succeeds() - .no_stderr() .stdout_only("nice>base?"); } @@ -125,7 +117,6 @@ fn test_base16() { .arg("--base16") .pipe_in("Hello, World!") .succeeds() - .no_stderr() .stdout_only("48656C6C6F2C20576F726C6421\n"); } @@ -135,7 +126,6 @@ fn test_base16_decode() { .args(&["--base16", "-d"]) .pipe_in("48656C6C6F2C20576F726C6421") .succeeds() - .no_stderr() .stdout_only("Hello, World!"); } @@ -145,7 +135,6 @@ fn test_base2msbf() { .arg("--base2msbf") .pipe_in("msbf") .succeeds() - .no_stderr() .stdout_only("01101101011100110110001001100110\n"); } @@ -155,7 +144,6 @@ fn test_base2msbf_decode() { .args(&["--base2msbf", "-d"]) .pipe_in("01101101011100110110001001100110") .succeeds() - .no_stderr() .stdout_only("msbf"); } @@ -165,7 +153,6 @@ fn test_base2lsbf() { .arg("--base2lsbf") .pipe_in("lsbf") .succeeds() - .no_stderr() .stdout_only("00110110110011100100011001100110\n"); } @@ -175,7 +162,6 @@ fn test_base2lsbf_decode() { .args(&["--base2lsbf", "-d"]) .pipe_in("00110110110011100100011001100110") .succeeds() - .no_stderr() .stdout_only("lsbf"); } @@ -194,7 +180,6 @@ fn test_choose_last_encoding_z85() { ]) .pipe_in("Hello, World") .succeeds() - .no_stderr() .stdout_only("nm=QNz.92jz/PV8\n"); } @@ -213,7 +198,6 @@ fn test_choose_last_encoding_base64() { ]) .pipe_in("Hello, World!") .succeeds() - .no_stderr() .stdout_only("SGVsbG8sIFdvcmxkIQ==\n"); // spell-checker:disable-line } @@ -232,7 +216,6 @@ fn test_choose_last_encoding_base2lsbf() { ]) .pipe_in("lsbf") .succeeds() - .no_stderr() .stdout_only("00110110110011100100011001100110\n"); } @@ -253,6 +236,18 @@ fn test_base32_decode_repeated() { ]) .pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line .succeeds() - .no_stderr() .stdout_only("nice>base?"); } + +// The restriction that input length has to be divisible by 4 only applies to data being encoded with Z85, not to the +// decoding of Z85-encoded data +#[test] +fn test_z85_length_check() { + new_ucmd!() + .args(&["--decode", "--z85"]) + // Input has length 10, not divisible by 4 + // spell-checker:disable-next-line + .pipe_in("f!$Kwh8WxM") + .succeeds() + .stdout_only("12345678"); +}