diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index fc01a83608d..622788d4d08 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -16,6 +16,7 @@ use nom::{ IResult, }; use std::{ + char, collections::{HashMap, HashSet}, error::Error, fmt::{Debug, Display}, @@ -23,6 +24,7 @@ use std::{ ops::Not, }; use uucore::error::UError; +use uucore::show_warning; #[derive(Debug, Clone)] pub enum BadSequence { @@ -293,7 +295,9 @@ impl Sequence { Self::parse_class, Self::parse_char_equal, // NOTE: This must be the last one - map(Self::parse_backslash_or_char, |s| Ok(Self::Char(s))), + map(Self::parse_backslash_or_char_with_warning, |s| { + Ok(Self::Char(s)) + }), )))(input) .map(|(_, r)| r) .unwrap() @@ -302,10 +306,16 @@ impl Sequence { } fn parse_octal(input: &[u8]) -> IResult<&[u8], u8> { + // For `parse_char_range`, `parse_char_star`, `parse_char_repeat`, `parse_char_equal`. + // Because in these patterns, there's no ambiguous cases. + preceded(tag("\\"), Self::parse_octal_up_to_three_digits)(input) + } + + fn parse_octal_with_warning(input: &[u8]) -> IResult<&[u8], u8> { preceded( tag("\\"), alt(( - Self::parse_octal_up_to_three_digits, + Self::parse_octal_up_to_three_digits_with_warning, // Fallback for if the three digit octal escape is greater than \377 (0xFF), and therefore can't be // parsed as as a byte // See test `test_multibyte_octal_sequence` @@ -319,13 +329,31 @@ impl Sequence { recognize(many_m_n(1, 3, one_of("01234567"))), |out: &[u8]| { let str_to_parse = std::str::from_utf8(out).unwrap(); + match u8::from_str_radix(str_to_parse, 8) { + Ok(ue) => Some(ue), + Err(_pa) => None, + } + }, + )(input) + } + fn parse_octal_up_to_three_digits_with_warning(input: &[u8]) -> IResult<&[u8], u8> { + map_opt( + recognize(many_m_n(1, 3, one_of("01234567"))), + |out: &[u8]| { + let str_to_parse = std::str::from_utf8(out).unwrap(); match u8::from_str_radix(str_to_parse, 8) { Ok(ue) => Some(ue), Err(_pa) => { - // TODO - // A warning needs to be printed here - // See https://github.com/uutils/coreutils/issues/6821 + let origin_octal: &str = std::str::from_utf8(input).unwrap(); + let actual_octal_tail: &str = std::str::from_utf8(&input[0..2]).unwrap(); + let outstand_char: char = char::from_u32(input[2] as u32).unwrap(); + show_warning!( + "the ambiguous octal escape \\{} is being\n interpreted as the 2-byte sequence \\0{}, {}", + origin_octal, + actual_octal_tail, + outstand_char + ); None } } @@ -360,6 +388,14 @@ impl Sequence { alt((Self::parse_octal, Self::parse_backslash, Self::single_char))(input) } + fn parse_backslash_or_char_with_warning(input: &[u8]) -> IResult<&[u8], u8> { + alt(( + Self::parse_octal_with_warning, + Self::parse_backslash, + Self::single_char, + ))(input) + } + fn single_char(input: &[u8]) -> IResult<&[u8], u8> { take(1usize)(input).map(|(l, a)| (l, a[0])) } diff --git a/tests/by-util/test_tr.rs b/tests/by-util/test_tr.rs index ebd7635e432..705f40834e5 100644 --- a/tests/by-util/test_tr.rs +++ b/tests/by-util/test_tr.rs @@ -1494,9 +1494,7 @@ fn test_multibyte_octal_sequence() { .args(&["-d", r"\501"]) .pipe_in("(1Ł)") .succeeds() - // TODO - // A warning needs to be printed here - // See https://github.com/uutils/coreutils/issues/6821 + .stderr_is("tr: warning: the ambiguous octal escape \\501 is being\n interpreted as the 2-byte sequence \\050, 1\n") .stdout_is("Ł)"); }