From 49ab423592e74cee5bae945b6553d162d106dbf4 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Thu, 17 Oct 2019 20:04:54 -0700 Subject: [PATCH] Refactor lexer tests (#498) - Refactor the lexer tests to be more readable, abandoning the previous string-based summary DSL in favor of a more obvious sequence of `TokenKinds` with optional lexemes. The new tests also test that token lexemes are correct. - Move duplicated `unindent` function into a shared crate, `test-utilities`. This new versionless dev-dependency will prevent publishing to crates.io, at least until rust-lang/cargo/pull/7333 makes it into stable. If we publish a new version before then, test-utilities will need to be published to crates.io, so we can depend on it by version. --- .gitignore | 10 +- Cargo.lock | 8 + Cargo.toml | 6 + src/lexer.rs | 1241 ++++++++++++----- src/testing.rs | 7 +- test-utilities/Cargo.toml | 9 + .../mod.rs => test-utilities/src/lib.rs | 5 +- tests/integration.rs | 4 +- tests/interrupts.rs | 4 +- tests/invocation_directory.rs | 5 +- tests/working_directory.rs | 5 +- 11 files changed, 946 insertions(+), 358 deletions(-) create mode 100644 test-utilities/Cargo.toml rename tests/testing/mod.rs => test-utilities/src/lib.rs (96%) diff --git a/.gitignore b/.gitignore index a9b5725132..9f62f0249c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ -/target /.vagrant /README.html -/tmp -/fuzz/target -/fuzz/corpus /fuzz/artifacts +/fuzz/corpus +/fuzz/target +/target +/test-utilities/Cargo.lock +/test-utilities/target +/tmp diff --git a/Cargo.lock b/Cargo.lock index 3788dc2658..3b0d79b435 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,6 +219,7 @@ dependencies = [ "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "target 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "test-utilities 0.0.0", "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -462,6 +463,13 @@ dependencies = [ "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "test-utilities" +version = "0.0.0" +dependencies = [ + "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "textwrap" version = "0.11.0" diff --git a/Cargo.toml b/Cargo.toml index 935afafcb8..9ee7fea24a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,3 +37,9 @@ features = ["termination"] [dev-dependencies] executable-path = "1" pretty_assertions = "0.6" + +# Until github.com/rust-lang/cargo/pull/7333 makes it into stable, +# this version-less dev-dependency will interfere with publishing +# to crates.io. +[dev-dependencies.test-utilities] +path = "test-utilities" diff --git a/src/lexer.rs b/src/lexer.rs index 8658b15251..049e5fdd32 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -626,366 +626,945 @@ impl<'a> Lexer<'a> { mod tests { use super::*; - fn summary(tokens: &[Token]) -> String { - use TokenKind::*; + use pretty_assertions::assert_eq; - tokens - .iter() - .map(|t| match t.kind { - At => "@", - Backtick => "`", - Colon => ":", - ColonEquals => ":=", - Comma => ",", - Comment => "#", - Dedent => "<", - Eof => ".", - Eol => "$", - Equals => "=", - Indent => ">", - InterpolationEnd => "}", - InterpolationStart => "{", - Line => "^", - Name => "N", - ParenL => "(", - ParenR => ")", - Plus => "+", - StringRaw => "'", - StringCooked => "\"", - Text => "_", - Whitespace => " ", - }) - .collect::>() - .join("") - } - - macro_rules! lex_test { - ($name:ident, $input:expr, $expected:expr $(,)*) => { + macro_rules! test { + { + name: $name:ident, + text: $text:expr, + tokens: ($($kind:ident $(: $lexeme:literal)?),* $(,)?)$(,)? + } => { #[test] fn $name() { - let input = $input; - let expected = $expected; - let tokens = Lexer::lex(input).unwrap(); - let roundtrip = tokens - .iter() - .map(Token::lexeme) - .collect::>() - .join(""); - let actual = summary(&tokens); + let kinds: &[TokenKind] = &[$($kind,)* Eof]; - use pretty_assertions::assert_eq; + let lexemes: &[&str] = &[$(lexeme!($kind $(, $lexeme)?),)* ""]; - assert_eq!(actual, expected, "token summary mismatch"); - - assert_eq!(input, roundtrip, "token round-trip not equal"); + test($text, kinds, lexemes); } - }; - } - - lex_test! { - name, - "foo", - "N.", - } - - lex_test! { - comment, - "# hello", - "#.", - } - - lex_test! { - backtick, - "`echo`", - "`.", - } - - lex_test! { - raw_string, - "'hello'", - "'.", - } - - lex_test! { - cooked_string, - r#""hello""#, - r#""."#, - } - - lex_test! { - export_concatination, - "export foo = 'foo' + 'bar'", - "N N = ' + '.", - } - - lex_test! { - export_complex, - "export foo = ('foo' + 'bar') + `baz`", - "N N = (' + ') + `.", - } - - lex_test! { - eol_linefeed, - "\n", - "$.", - } - - lex_test! { - eol_carriage_return_linefeed, - "\r\n", - "$.", - } - - lex_test! { - indented_line, - "foo:\n a", - "N:$>^_<.", - } - - lex_test! { - indented_block, - r##"foo: - a - b - c -"##, - "N:$>^_$ ^_$ ^_$<.", + } } - lex_test! { - indented_block_followed_by_item, - "foo: - a -b:", - "N:$>^_$ { + $lexeme + }; + { + $kind:ident + } => { + default_lexeme($kind) + } } - lex_test! { - indented_block_followed_by_blank, - "foo: - a + fn test(text: &str, want_kinds: &[TokenKind], want_lexemes: &[&str]) { + let text = testing::unindent(text); -b:", - "N:$>^_$^$^_$<.", - } + let have = Lexer::lex(&text).unwrap(); - lex_test! { - indented_blocks, - " -b: a - @mv a b + let have_kinds = have + .iter() + .map(|token| token.kind) + .collect::>(); -a: - @touch F - @touch a + let have_lexemes = have + .iter() + .map(|token| token.lexeme()) + .collect::>(); -d: c - @rm c + assert_eq!(have_kinds, want_kinds, "Token kind mismatch"); + assert_eq!(have_lexemes, want_lexemes, "Token lexeme mismatch"); -c: b - @mv b c", - "$N: N$>^_$^$^_$ ^_$^$^_$^$^_<.", - } + let mut roundtrip = String::new(); - lex_test! { - interpolation_empty, - "hello:\n echo {{}}", - "N:$>^_{}<.", - } + for lexeme in have_lexemes { + roundtrip.push_str(lexeme); + } - lex_test! { - interpolation_expression, - "hello:\n echo {{`echo hello` + `echo goodbye`}}", - "N:$>^_{` + `}<.", - } + assert_eq!(roundtrip, text, "Roundtrip mismatch"); - lex_test! { - tokenize_names, - "\ -foo -bar-bob -b-bob_asdfAAAA -test123", - "N$N$N$N.", - } + let mut offset = 0; + let mut line = 0; + let mut column = 0; - lex_test! { - tokenize_indented_line, - "foo:\n a", - "N:$>^_<.", - } + for token in have { + assert_eq!(token.offset, offset); + assert_eq!(token.line, line); + assert_eq!(token.lexeme().len(), token.length); + assert_eq!(token.column, column); - lex_test! { - tokenize_indented_block, - r##"foo: - a - b - c -"##, - "N:$>^_$ ^_$ ^_$<.", - } + for c in token.lexeme().chars() { + if c == '\n' { + line += 1; + column = 0; + } else { + column += c.len_utf8(); + } + } - lex_test! { - tokenize_strings, - r#"a = "'a'" + '"b"' + "'c'" + '"d"'#echo hello"#, - r#"N = " + ' + " + '#."#, + offset += token.length; + } } - lex_test! { - tokenize_recipe_interpolation_eol, - "foo: # some comment - {{hello}} -", - "N: #$>^{N}$<.", + fn default_lexeme(kind: TokenKind) -> &'static str { + match kind { + // Fixed lexemes + At => "@", + Colon => ":", + ColonEquals => ":=", + Comma => ",", + Eol => "\n", + Equals => "=", + Indent => " ", + InterpolationEnd => "}}", + InterpolationStart => "{{", + ParenL => "(", + ParenR => ")", + Plus => "+", + Whitespace => " ", + + // Empty lexemes + Line | Dedent | Eof => "", + + // Variable lexemes + Text | StringCooked | StringRaw | Name | Comment | Backtick => { + panic!("Token {:?} has no default lexeme", kind) + } + } } - lex_test! { - tokenize_recipe_interpolation_eof, - "foo: # more comments + test! { + name: name_new, + text: "foo", + tokens: (Name:"foo"), + } + + test! { + name: comment, + text: "# hello", + tokens: (Comment:"# hello"), + } + + test! { + name: backtick, + text: "`echo`", + tokens: (Backtick:"`echo`"), + } + + test! { + name: raw_string, + text: "'hello'", + tokens: (StringRaw:"'hello'"), + } + + test! { + name: cooked_string, + text: "\"hello\"", + tokens: (StringCooked:"\"hello\""), + } + + test! { + name: export_concatination, + text: "export foo = 'foo' + 'bar'", + tokens: ( + Name:"export", + Whitespace, + Name:"foo", + Whitespace, + Equals, + Whitespace, + StringRaw:"'foo'", + Whitespace, + Plus, + Whitespace, + StringRaw:"'bar'", + ) + } + + test! { + name: export_complex, + text: "export foo = ('foo' + 'bar') + `baz`", + tokens: ( + Name:"export", + Whitespace, + Name:"foo", + Whitespace, + Equals, + Whitespace, + ParenL, + StringRaw:"'foo'", + Whitespace, + Plus, + Whitespace, + StringRaw:"'bar'", + ParenR, + Whitespace, + Plus, + Whitespace, + Backtick:"`baz`", + ), + } + + test! { + name: eol_linefeed, + text: "\n", + tokens: (Eol), + } + + test! { + name: eol_carriage_return_linefeed, + text: "\r\n", + tokens: (Eol:"\r\n"), + } + + test! { + name: indented_line, + text: "foo:\n a", + tokens: (Name:"foo", Colon, Eol, Indent:" ", Line, Text:"a", Dedent), + } + + test! { + name: indented_block, + text: " + foo: + a + b + c + ", + tokens: ( + Name:"foo", + Colon, + Eol, + Indent, + Line, + Text:"a", + Eol, + Whitespace:" ", + Line, + Text:"b", + Eol, + Whitespace:" ", + Line, + Text:"c", + Eol, + Dedent, + ) + } + + test! { + name: indented_block_followed_by_item, + text: " + foo: + a + b: + ", + tokens: ( + Name:"foo", + Colon, + Eol, + Indent, + Line, + Text:"a", + Eol, + Dedent, + Name:"b", + Colon, + Eol, + ) + } + + test! { + name: indented_block_followed_by_blank, + text: " + foo: + a + + b: + ", + tokens: ( + Name:"foo", + Colon, + Eol, + Indent:" ", + Line, + Text:"a", + Eol, + Line, + Eol, + Dedent, + Name:"b", + Colon, + Eol, + ), + } + + test! { + name: indented_line_containing_unpaired_carriage_return, + text: "foo:\n \r \n", + tokens: ( + Name:"foo", + Colon, + Eol, + Indent:" ", + Line, + Text:"\r ", + Eol, + Dedent, + ), + } + + test! { + name: indented_blocks, + text: " + b: a + @mv a b + + a: + @touch F + @touch a + + d: c + @rm c + + c: b + @mv b c + ", + tokens: ( + Name:"b", + Colon, + Whitespace, + Name:"a", + Eol, + Indent, + Line, + Text:"@mv a b", + Eol, + Line, + Eol, + Dedent, + Name:"a", + Colon, + Eol, + Indent, + Line, + Text:"@touch F", + Eol, + Whitespace:" ", + Line, + Text:"@touch a", + Eol, + Line, + Eol, + Dedent, + Name:"d", + Colon, + Whitespace, + Name:"c", + Eol, + Indent, + Line, + Text:"@rm c", + Eol, + Line, + Eol, + Dedent, + Name:"c", + Colon, + Whitespace, + Name:"b", + Eol, + Indent, + Line, + Text:"@mv b c", + Eol, + Dedent + ), + } + + test! { + name: interpolation_empty, + text: "hello:\n echo {{}}", + tokens: ( + Name:"hello", + Colon, + Eol, + Indent:" ", + Line, + Text:"echo ", + InterpolationStart, + InterpolationEnd, + Dedent, + ), + } + + test! { + name: interpolation_expression, + text: "hello:\n echo {{`echo hello` + `echo goodbye`}}", + tokens: ( + Name:"hello", + Colon, + Eol, + Indent:" ", + Line, + Text:"echo ", + InterpolationStart, + Backtick:"`echo hello`", + Whitespace, + Plus, + Whitespace, + Backtick:"`echo goodbye`", + InterpolationEnd, + Dedent, + ), + } + + test! { + name: tokenize_names, + text: " + foo + bar-bob + b-bob_asdfAAAA + test123 + ", + tokens: ( + Name:"foo", + Eol, + Name:"bar-bob", + Eol, + Name:"b-bob_asdfAAAA", + Eol, + Name:"test123", + Eol, + ), + } + + test! { + name: tokenize_indented_line, + text: "foo:\n a", + tokens: ( + Name:"foo", + Colon, + Eol, + Indent:" ", + Line, + Text:"a", + Dedent, + ), + } + + test! { + name: tokenize_indented_block, + text: " + foo: + a + b + c + ", + tokens: ( + Name:"foo", + Colon, + Eol, + Indent, + Line, + Text:"a", + Eol, + Whitespace:" ", + Line, + Text:"b", + Eol, + Whitespace:" ", + Line, + Text:"c", + Eol, + Dedent, + ), + } + + test! { + name: tokenize_strings, + text: r#"a = "'a'" + '"b"' + "'c'" + '"d"'#echo hello"#, + tokens: ( + Name:"a", + Whitespace, + Equals, + Whitespace, + StringCooked:"\"'a'\"", + Whitespace, + Plus, + Whitespace, + StringRaw:"'\"b\"'", + Whitespace, + Plus, + Whitespace, + StringCooked:"\"'c'\"", + Whitespace, + Plus, + Whitespace, + StringRaw:"'\"d\"'", + Comment:"#echo hello", + ) + } + + test! { + name: tokenize_recipe_interpolation_eol, + text: " + foo: # some comment + {{hello}} + ", + tokens: ( + Name:"foo", + Colon, + Whitespace, + Comment:"# some comment", + Eol, + Indent:" ", + Line, + InterpolationStart, + Name:"hello", + InterpolationEnd, + Eol, + Dedent + ), + } + + test! { + name: tokenize_recipe_interpolation_eof, + text: "foo: # more comments {{hello}} # another comment ", - "N: #$>^{N}$<#$.", - } - - lex_test! { - tokenize_recipe_complex_interpolation_expression, - "foo: #lol\n {{a + b + \"z\" + blarg}}", - "N: #$>^{N + N + \" + N}<.", - } - - lex_test! { - tokenize_recipe_multiple_interpolations, - "foo:,#ok\n {{a}}0{{b}}1{{c}}", - "N:,#$>^{N}_{N}_{N}<.", - } - - lex_test! { - tokenize_junk, - "bob - -hello blah blah blah : a b c #whatever + tokens: ( + Name:"foo", + Colon, + Whitespace, + Comment:"# more comments", + Eol, + Indent:" ", + Line, + InterpolationStart, + Name:"hello", + InterpolationEnd, + Eol, + Dedent, + Comment:"# another comment", + Eol, + ), + } + + test! { + name: tokenize_recipe_complex_interpolation_expression, + text: "foo: #lol\n {{a + b + \"z\" + blarg}}", + tokens: ( + Name:"foo", + Colon, + Whitespace:" ", + Comment:"#lol", + Eol, + Indent:" ", + Line, + InterpolationStart, + Name:"a", + Whitespace, + Plus, + Whitespace, + Name:"b", + Whitespace, + Plus, + Whitespace, + StringCooked:"\"z\"", + Whitespace, + Plus, + Whitespace, + Name:"blarg", + InterpolationEnd, + Dedent, + ), + } + + test! { + name: tokenize_recipe_multiple_interpolations, + text: "foo:,#ok\n {{a}}0{{b}}1{{c}}", + tokens: ( + Name:"foo", + Colon, + Comma, + Comment:"#ok", + Eol, + Indent:" ", + Line, + InterpolationStart, + Name:"a", + InterpolationEnd, + Text:"0", + InterpolationStart, + Name:"b", + InterpolationEnd, + Text:"1", + InterpolationStart, + Name:"c", + InterpolationEnd, + Dedent, + + ), + } + + test! { + name: tokenize_junk, + text: " + bob + + hello blah blah blah : a b c #whatever ", - "N$$N N N N : N N N #$ .", - } - - lex_test! { - tokenize_empty_lines, - " -# this does something -hello: - asdf - bsdf - - csdf - - dsdf # whatever - -# yolo - ", - "$#$N:$>^_$ ^_$^$ ^_$^$ ^_$^$<#$ .", - } - - lex_test! { - tokenize_comment_before_variable, - " -# -A='1' -echo: - echo {{A}} - ", - "$#$N='$N:$>^_{N}$ <.", - } - - lex_test! { - tokenize_interpolation_backticks, - "hello:\n echo {{`echo hello` + `echo goodbye`}}", - "N:$>^_{` + `}<.", - } - - lex_test! { - tokenize_empty_interpolation, - "hello:\n echo {{}}", - "N:$>^_{}<.", - } - - lex_test! { - tokenize_assignment_backticks, - "a = `echo hello` + `echo goodbye`", - "N = ` + `.", - } - - lex_test! { - tokenize_multiple, - " -hello: - a - b - - c - - d - -# hello -bob: - frank - \t", - - "$N:$>^_$ ^_$^$ ^_$^$ ^_$^$<#$N:$>^_$ <.", - } - - lex_test! { - tokenize_comment, - "a:=#", - "N:=#." - } - - lex_test! { - tokenize_comment_with_bang, - "a:=#foo!", - "N:=#." - } - - lex_test! { - tokenize_order, - r" -b: a - @mv a b - -a: - @touch F - @touch a - -d: c - @rm c - -c: b - @mv b c", - "$N: N$>^_$^$^_$ ^_$^$^_$^$^_<.", - } - - lex_test! { - tokenize_parens, - r"((())) )abc(+", - "((())) )N(+.", - } - - lex_test! { - crlf_newline, - "#\r\n#asdf\r\n", - "#$#$.", - } - - lex_test! { - multiple_recipes, - "a:\n foo\nb:", - "N:$>^_$ Justfile { } } -pub(crate) fn tempdir() -> tempfile::TempDir { - tempfile::Builder::new() - .prefix("just-test-tempdir") - .tempdir() - .expect("failed to create temporary directory") -} +pub(crate) use test_utilities::{tempdir, unindent}; macro_rules! error_test { ( diff --git a/test-utilities/Cargo.toml b/test-utilities/Cargo.toml new file mode 100644 index 0000000000..aa92a1d496 --- /dev/null +++ b/test-utilities/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "test-utilities" +version = "0.0.0" +authors = ["Casey Rodarmor "] +edition = "2018" +publish = false + +[dependencies] +tempfile = "3" diff --git a/tests/testing/mod.rs b/test-utilities/src/lib.rs similarity index 96% rename from tests/testing/mod.rs rename to test-utilities/src/lib.rs index 40fd9c96f8..3e906cc404 100644 --- a/tests/testing/mod.rs +++ b/test-utilities/src/lib.rs @@ -1,12 +1,11 @@ -pub(crate) fn tempdir() -> tempfile::TempDir { +pub fn tempdir() -> tempfile::TempDir { tempfile::Builder::new() .prefix("just-test-tempdir") .tempdir() .expect("failed to create temporary directory") } -#[allow(dead_code)] -pub(crate) fn unindent(text: &str) -> String { +pub fn unindent(text: &str) -> String { // find line start and end indices let mut lines = Vec::new(); let mut start = 0; diff --git a/tests/integration.rs b/tests/integration.rs index d660982902..bca05e0c30 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1,5 +1,3 @@ -mod testing; - use std::{ env, fs, io::Write, @@ -11,7 +9,7 @@ use std::{ use executable_path::executable_path; use libc::{EXIT_FAILURE, EXIT_SUCCESS}; use pretty_assertions::assert_eq; -use testing::{tempdir, unindent}; +use test_utilities::{tempdir, unindent}; /// Instantiate an integration test. macro_rules! integration_test { diff --git a/tests/interrupts.rs b/tests/interrupts.rs index 8eddb90876..0184cf80c4 100644 --- a/tests/interrupts.rs +++ b/tests/interrupts.rs @@ -1,14 +1,12 @@ -mod testing; - #[cfg(unix)] mod unix { - use super::testing::tempdir; use executable_path::executable_path; use std::{ fs, process::Command, time::{Duration, Instant}, }; + use test_utilities::tempdir; fn kill(process_id: u32) { unsafe { diff --git a/tests/invocation_directory.rs b/tests/invocation_directory.rs index f390f0a77b..a7bfc07ea2 100644 --- a/tests/invocation_directory.rs +++ b/tests/invocation_directory.rs @@ -1,10 +1,7 @@ -mod testing; - use std::{fs, path::Path, process, str}; use executable_path::executable_path; - -use testing::tempdir; +use test_utilities::tempdir; #[cfg(unix)] fn to_shell_path(path: &Path) -> String { diff --git a/tests/working_directory.rs b/tests/working_directory.rs index e9004343fe..ecab99df6e 100644 --- a/tests/working_directory.rs +++ b/tests/working_directory.rs @@ -1,10 +1,7 @@ -mod testing; - use std::{error::Error, fs, process::Command}; use executable_path::executable_path; - -use testing::tempdir; +use test_utilities::tempdir; /// Test that just runs with the correct working directory when invoked with /// `--justfile` but not `--working-directory`