Skip to content

Commit

Permalink
Adds --hex option to inspect (#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt authored Oct 31, 2024
1 parent 3a5d9c5 commit 1953628
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 33 deletions.
110 changes: 78 additions & 32 deletions src/bin/ion/commands/inspect.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::fmt::Display;
use std::io::Write;
use std::io::{Cursor, ErrorKind, Read, Write};

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `ErrorKind`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `Read`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `ErrorKind`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `Read`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `ErrorKind`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `Read`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `ErrorKind`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `Read`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `ErrorKind`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `Read`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `ErrorKind`

Check warning on line 2 in src/bin/ion/commands/inspect.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `Read`
use std::str::FromStr;

use crate::commands::{CommandIo, IonCliCommand, WithIonCliArgument};
Expand All @@ -10,6 +10,7 @@ use ion_rs::v1_0::{EncodedBinaryValue, RawValueRef};
use ion_rs::*;

// The `inspect` command uses the `termcolor` crate to colorize its text when STDOUT is a TTY.
use crate::hex_reader::HexReader;
use termcolor::{Color, ColorSpec, WriteColor};
// When writing to a named file instead of STDOUT, `inspect` will use a `FileWriter` instead.
// `FileWriter` ignores all requests to emit TTY color escape codes.
Expand All @@ -31,10 +32,9 @@ impl IonCliCommand for InspectCommand {
}

fn about(&self) -> &'static str {
"Displays hex-encoded binary Ion alongside its equivalent text Ion.
Its output prioritizes human readability and is likely to change
between versions. Stable output for programmatic use cases is a
non-goal."
"Displays hex-encoded binary Ion alongside its equivalent text Ion. \
Its output prioritizes human readability and is likely to change \
between versions. Stable output for programmatic use cases is a non-goal."
}

fn configure_args(&self, command: Command) -> Command {
Expand All @@ -50,12 +50,12 @@ non-goal."
.hide_default_value(true)
.help("Do not display any user values for the first `n` bytes of Ion data.")
.long_help(
"When specified, the inspector will skip ahead `n` bytes before
beginning to display the contents of the stream. If the requested number
of bytes falls in the middle of a scalar, the whole value (complete with
field ID and annotations if applicable) will be displayed. If the value
is nested in one or more containers, the opening delimiters of those
containers be displayed.",
"When specified, the inspector will skip ahead `n` bytes before beginning \
to display the contents of the stream. If the requested number of bytes \
falls in the middle of a scalar, the whole value (complete with field ID \
and annotations if applicable) will be displayed. If the value is nested \
in one or more containers, the opening delimiters of those containers be \
displayed.",
),
)
.arg(
Expand All @@ -67,13 +67,12 @@ containers be displayed.",
.hide_default_value(true)
.help("Only display the next 'n' bytes of Ion data.")
.long_help(
"When specified, the inspector will stop printing values after
processing `n` bytes of Ion data. If `n` falls within a scalar, the
complete value will be displayed. If `n` falls within one or more containers,
the closing delimiters for those containers will be displayed. If this flag
is used with `--skip-bytes`, `n` is counted from the beginning of the first
value start after `--skip-bytes`.
",
"When specified, the inspector will stop printing values after \
processing `n` bytes of Ion data. If `n` falls within a scalar, the \
complete value will be displayed. If `n` falls within one or more \
containers, the closing delimiters for those containers will be displayed. \
If this flag is used with `--skip-bytes`, `n` is counted from the beginning \
of the first value start after `--skip-bytes`.",
),
)
.arg(
Expand All @@ -84,13 +83,27 @@ value start after `--skip-bytes`.
.value_parser(ValueParser::bool())
.help("Do not show values produced by macro evaluation.")
.long_help(
"When specified, the inspector will display e-expressions
(that is: data stream macro invocations) but will not show values produced
by evaluating those e-expressions. If an e-expression produces a 'system'
value that modifies the encoding context (that is: a symbol table or
encoding directive), that value will still be displayed.",
"When specified, the inspector will display e-expressions (that is: \
data stream macro invocations) but will not show values produced by \
evaluating those e-expressions. If an e-expression produces a 'system' \
value that modifies the encoding context (that is: a symbol table or \
encoding directive), that value will still be displayed.",
),
)
.arg(
Arg::new("hex-input")
.long("hex")
.num_args(0..=1)
.action(ArgAction::Append)
.require_equals(true)
.help("Specify that the input Ion binary is encoded as hexadecimal pairs.")
.long_help(
"When specified, the inspector will convert the input from hexadecimal \
digits to Ion binary. The input may be STDIN, one or more files, or it may \
be provided inline using '='. If the hex input is provided inline, all \
other inputs will be ignored.",
)
)
}

fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> {
Expand Down Expand Up @@ -126,17 +139,50 @@ value start after `--skip-bytes`.

let hide_expansion = args.get_flag("hide-expansion");

CommandIo::new(args).for_each_input(|output, input| {
let mut command_io = CommandIo::new(args);

let mut read_as_hex_string = false;
if let Some(hex_args) = args.get_many::<String>("hex-input") {
read_as_hex_string = true;

if hex_args.len() > 0 {
let mut byte_string = String::new();
hex_args.into_iter().for_each(|s| byte_string.push_str(s));
return command_io.write_output(|output| {
inspect_input(
&byte_string,
IonStream::new(HexReader::from(Cursor::new(byte_string.clone()))),
output,
bytes_to_skip,
limit_bytes,
hide_expansion,
)
});
}
}

command_io.for_each_input(|output, input| {
let input_name = input.name().to_owned();
let input = input.into_source();
inspect_input(
&input_name,
input,
output,
bytes_to_skip,
limit_bytes,
hide_expansion,
)
if read_as_hex_string {
inspect_input(
&input_name,
HexReader::from(input),
output,
bytes_to_skip,
limit_bytes,
hide_expansion,
)
} else {
inspect_input(
&input_name,
input,
output,
bytes_to_skip,
limit_bytes,
hide_expansion,
)
}
})
}
}
Expand Down
26 changes: 26 additions & 0 deletions src/bin/ion/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,4 +289,30 @@ impl<'a> CommandIo<'a> {
output.flush()?;
Ok(())
}

fn write_output(&self, mut f: impl FnMut(&mut CommandOutput) -> Result<()>) -> Result<()> {
// These types are provided by the `termcolor` crate. They wrap the normal `io::Stdout` and
// `io::StdOutLock` types, making it possible to write colorful text to the output stream when
// it's a TTY that understands formatting escape codes. These variables are declared here so
// the lifetime will extend through the remainder of the function. Unlike `io::StdoutLock`,
// the `StandardStreamLock` does not have a static lifetime.
let stdout: StandardStream;
let stdout_lock: StandardStreamLock;
let mut output = if let Some(output_file) = self.args.get_one::<String>("output") {
// If the user has specified an output file, use it.
let file = File::create(output_file).with_context(|| {
format!(
"could not open file output file '{}' for writing",
output_file
)
})?;
CommandOutput::File(FileWriter::new(file))
} else {
// Otherwise, write to STDOUT.
stdout = StandardStream::stdout(ColorChoice::Always);
stdout_lock = stdout.lock();
CommandOutput::StdOut(stdout_lock)
};
f(&mut output)
}
}
2 changes: 1 addition & 1 deletion src/bin/ion/commands/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl IonCliCommand for PrimitiveCommand {
fn integer_from_text(text: &str) -> Result<i64> {
if text.starts_with("0x") {
i64::from_str_radix(text, 16)
.with_context(|| format!("{} is not a valid hexidecimal integer value.", text))
.with_context(|| format!("{} is not a valid hexadecimal integer value.", text))
} else if text.starts_with("0b") {
i64::from_str_radix(text, 2)
.with_context(|| format!("{} is not a valid binary integer value.", text))
Expand Down
173 changes: 173 additions & 0 deletions src/bin/ion/hex_reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
use crate::hex_reader::DigitState::ZeroX;

Check warning on line 1 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `crate::hex_reader::DigitState::ZeroX`

Check warning on line 1 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `crate::hex_reader::DigitState::ZeroX`

Check warning on line 1 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `crate::hex_reader::DigitState::ZeroX`

Check warning on line 1 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `crate::hex_reader::DigitState::ZeroX`

Check warning on line 1 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `crate::hex_reader::DigitState::ZeroX`

Check warning on line 1 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `crate::hex_reader::DigitState::ZeroX`
use ion_rs::{IonInput, IonStream};
use std::io::{Bytes, Cursor, Error, ErrorKind, Read};

Check warning on line 3 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `Cursor`

Check warning on line 3 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `Cursor`

Check warning on line 3 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `Cursor`

Check warning on line 3 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (macos-latest)

unused import: `Cursor`

Check warning on line 3 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `Cursor`

Check warning on line 3 in src/bin/ion/hex_reader.rs

View workflow job for this annotation

GitHub Actions / Build and Test (windows-latest)

unused import: `Cursor`

/// Wraps an existing reader in order to reinterpret the content of that reader as a
/// hexadecimal-encoded byte stream.
///
/// This can read hex digit pairs in the form `0xHH` or `HH` where `H` is a case-insensitive
/// hexadecimal digit. Between pairs, there can be any number of whitespace characters or commas.
/// These are the only accepted characters.
///
/// If the input contains any unacceptable characters or unpaired hex digits, the `read` function
/// will (upon encountering that character) return `Err`.
pub struct HexReader<R: Read> {
inner: Bytes<R>,
digit_state: DigitState,
}

#[derive(Eq, PartialEq, Debug)]
enum DigitState {
/// The reader is ready to encounter a hexadecimal-encoded byte.
Empty,
/// The reader has encountered a `0`. This is an ambiguous state where we could be looking at a
/// `0` that is the first in a pair with another hex digit, or it could be the `0` before an `x`.
/// In other words, we're at the start of `0H` or `0xHH`, and we don't yet know which it is.
Zero,
/// The reader has seen `0x`. The next character must be a hex digit, which is the upper nibble
/// of the hex-encoded byte.
ZeroX,
/// The reader has seen either `0xH` or `H`. The next character must be a hex digit, and will
/// form a complete hex-encoded byte.
HasUpperNibble(char),
}

impl<R: Read> IonInput for HexReader<R> {
type DataSource = IonStream<Self>;

fn into_data_source(self) -> Self::DataSource {
IonStream::new(self)
}
}

impl<R: Read> From<R> for HexReader<R> {
fn from(value: R) -> Self {
Self {
inner: value.bytes(),
digit_state: DigitState::Empty,
}
}
}

impl<R: Read> Read for HexReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if buf.len() == 0 {
return Ok(0);
}

let mut bytes_read = 0usize;

while let Some(b) = self.inner.next() {
let c = char::from(b?);

use DigitState::*;
match self.digit_state {
Empty if c.is_whitespace() || c == ',' => { /* Ignore these characters */ }
// We've encountered either the first digit or the `0` of `0x`.
Empty if c == '0' => self.digit_state = Zero,
// Now we know that this hex-encoded byte is going to be `0xHH` rather than `0H`
Zero if c == 'x' => self.digit_state = ZeroX,
// Reading the first digit of the hex-encoded byte
Empty | ZeroX if c.is_digit(16) => self.digit_state = HasUpperNibble(c),
// Reading the second digit of the hex-encoded byte
Zero if c.is_digit(16) => {
// Unwrap is guaranteed not to panic because we've been putting only valid hex
// digit characters in the `digit_buffer` String.
let value = c.to_digit(16).unwrap();
// This unwrap is guaranteed not to panic because the max it could be is 0x0F
buf[bytes_read] = u8::try_from(value).unwrap();
bytes_read += 1;
self.digit_state = Empty;
}
HasUpperNibble(c0) if c.is_digit(16) => {
// The first unwrap is guaranteed not to panic because we already know that both
// chars are valid hex digits.
// The second unwrap is guaranteed not to panic because the max it could be is 0x0F
let high_nibble: u8 = c0.to_digit(16).unwrap().try_into().unwrap();
let low_nibble: u8 = c.to_digit(16).unwrap().try_into().unwrap();
buf[bytes_read] = (high_nibble << 4) + low_nibble;
bytes_read += 1;
self.digit_state = Empty;
}
// Error cases
_ if c.is_whitespace() => {
return Err(Error::new(
ErrorKind::InvalidData,
format!("unexpected whitespace when digit expected: '{c}'"),
))
}
_ => {
return Err(Error::new(
ErrorKind::InvalidData,
format!("not a valid hexadecimal digit: '{c}'"),
))
}
}

if bytes_read == buf.len() {
break;
}
}

if bytes_read < buf.len() && self.digit_state != DigitState::Empty {
return Err(Error::new(
ErrorKind::InvalidData,
"found an odd number of hex digits",
));
}

Ok(bytes_read)
}
}

#[test]
fn test_read_hex_digits() {
let hex = "00010203";
let reader = HexReader::from(Cursor::new(hex));
let translated_bytes: std::io::Result<Vec<_>> = reader.bytes().collect();
let expected = vec![0u8, 1, 2, 3];
assert_eq!(expected, translated_bytes.unwrap())
}

#[test]
fn test_read_hex_digits_with_whitespace() {
let hex = "00 01\n 02 \t \t\t 03 \r\n04";
let reader = HexReader::from(Cursor::new(hex));
let translated_bytes: std::io::Result<Vec<_>> = reader.bytes().collect();
let expected = vec![0u8, 1, 2, 3, 4];
assert_eq!(expected, translated_bytes.unwrap())
}

#[test]
fn test_read_hex_digits_with_leading_0x() {
let hex = "0x00 0x01 0x02 0x03 0x04";
let reader = HexReader::from(Cursor::new(hex));
let translated_bytes: std::io::Result<Vec<_>> = reader.bytes().collect();
let expected = vec![0u8, 1, 2, 3, 4];
assert_eq!(expected, translated_bytes.unwrap())
}

#[test]
fn test_read_hex_digits_with_commas() {
let hex = "00,01,02,03,04";
let reader = HexReader::from(Cursor::new(hex));
let translated_bytes: std::io::Result<Vec<_>> = reader.bytes().collect();
let expected = vec![0u8, 1, 2, 3, 4];
assert_eq!(expected, translated_bytes.unwrap())
}

#[test]
fn test_read_odd_number_of_hex_digits() {
let hex = "000102030";
let reader = HexReader::from(Cursor::new(hex));
let translated_bytes: std::io::Result<Vec<_>> = reader.bytes().collect();
assert!(translated_bytes.is_err())
}

#[test]
fn test_read_hex_digits_with_invalid_char() {
let hex = "000102030Q";
let reader = HexReader::from(Cursor::new(hex));
let translated_bytes: std::io::Result<Vec<_>> = reader.bytes().collect();
assert!(translated_bytes.is_err())
}
1 change: 1 addition & 0 deletions src/bin/ion/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod ansi_codes;
mod auto_decompress;
mod commands;
mod file_writer;
mod hex_reader;
mod input;
mod input_grouping;
mod output;
Expand Down

0 comments on commit 1953628

Please sign in to comment.