From c4a228c2a79f37fa726564082a1e7ebf7473da33 Mon Sep 17 00:00:00 2001 From: Chad Nelson Date: Wed, 23 Sep 2020 08:27:45 -0400 Subject: [PATCH] Allow empty parens in a string to be safely parsed This change allows an empty set of parens, `()`, to be in a string and to be parsed as a token. Prior to this change, and empty set of parens would be parsed as a list with no contents which could not be successfully parsed, but instead results in a error as described in #101. This change fixes issue #101. --- lib/parsing_nesting/grammar.rb | 7 ++++++- spec/parsing_nesting/build_tree_spec.rb | 23 +++++++++++++++++++++++ spec/parsing_nesting/consuming_spec.rb | 3 ++- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/lib/parsing_nesting/grammar.rb b/lib/parsing_nesting/grammar.rb index 658303a..c01a7aa 100644 --- a/lib/parsing_nesting/grammar.rb +++ b/lib/parsing_nesting/grammar.rb @@ -18,6 +18,11 @@ class Grammar < Parslet::Parser paren_list end + rule :empty_paren do + (str('()')) + end + + # Note well: It was tricky to parse the thing we want where you can # have a flat list with boolean operators, but where 'OR' takes precedence. # eg "A AND B OR C AND C" or "A OR B AND C OR D". Tricky to parse at all, @@ -47,7 +52,7 @@ class Grammar < Parslet::Parser end rule :token do - match['^ ")('].repeat(1).as(:token) + (match['^ ")('] | empty_paren ).repeat(1).as(:token) end rule :phrase do match('"') >> match['^"'].repeat(1).as(:phrase) >> match('"') diff --git a/spec/parsing_nesting/build_tree_spec.rb b/spec/parsing_nesting/build_tree_spec.rb index c1e33db..af5f84c 100644 --- a/spec/parsing_nesting/build_tree_spec.rb +++ b/spec/parsing_nesting/build_tree_spec.rb @@ -198,6 +198,29 @@ def should_be_not_expression(graph) end end + it "should parse term list with empty parens () " do + should_be_list parse("foo () bar") do |list| + expect(list.length).to eq(3) + expect(list[1].value).to eq('()') + end + end + + it "should parse term list with leading or trailing empty parens () " do + should_be_list parse("() foo ()") do |list| + expect(list.length).to eq(3) + expect(list[0].value).to eq('()') + expect(list[2].value).to eq('()') + end + end + + it "should parse term list with nested parens ()" do + should_be_list parse("(()) foo") do |list| + expect(list.length).to eq(2) + expect(list[0].value).to eq('()') + end + end + + it "should build for a crazy complicated one" do should_be_list parse("mark +twain AND huck OR fun OR ((jim AND river) AND (red -dogs))") do |list| should_be_term list[0], "mark" diff --git a/spec/parsing_nesting/consuming_spec.rb b/spec/parsing_nesting/consuming_spec.rb index 93dc28e..f9777e3 100644 --- a/spec/parsing_nesting/consuming_spec.rb +++ b/spec/parsing_nesting/consuming_spec.rb @@ -29,7 +29,8 @@ "NOT (four five)", "(one two three) OR (four five) AND six", '"foo+bar (baz"', - "(foo bar one AND two) AND (three four ten OR twelve)" + "(foo bar one AND two) AND (three four ten OR twelve)", + "one () two" ].each do |query| it "should consume<<#{query}>>" do expect { @parser.parse(query) }.not_to raise_error