From ced4ffbaecaaa777f3f0b00ee412df20bc91be10 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 16 May 2024 11:32:53 +1000 Subject: [PATCH] Change AST for iterations to use `iteration` kind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `=` node which has traditionally been used for iteration specifications in `for` loops and generators doesn't have normal assignment semantics. Let's consider for x in xs body end which has been parsed as `(for (= x xs) (block body))`. Problems: * The iteration does create a binding for `x`, but not to the expression on the right hand side of the `=`. * The user may use `in` or `∈` in the source code rather than `=`. The parser still uses a `=` node for consistency but this only emphasizes that there's something a bit weird going on. So this use of `=` is not assignment; merely assignment-like. In this change, we use a new iteration kind instead of `=` so the `for` loop parses as `(for (iteration x xs) (block body))` instead. We also reuse `iteration` to replace the `cartesian_iteration` head - cartesian iteration is just an iteration with an even number of children greater than two. Being less specific here with the naming (omitting the "cartesian") seems appropriate in trying to represent the surface syntax; cartesian semantics come later in lowering and a macro may decide to do something else with the iteration spec. These changes are also used for generators. After the changes we have tree structures such as julia> parsestmt(SyntaxNode, "for i in is body end") line:col│ tree │ file_name 1:1 │[for] 1:4 │ [iteration] 1:5 │ i 1:10 │ is 1:12 │ [block] 1:13 │ body julia> parsestmt(SyntaxNode, "for i in is, j in js body end") line:col│ tree │ file_name 1:1 │[for] 1:4 │ [iteration] 1:5 │ i 1:10 │ is 1:14 │ j 1:19 │ js 1:21 │ [block] 1:22 │ body julia> parsestmt(SyntaxNode, "[a for i = is, j = js if z]") line:col│ tree │ file_name 1:1 │[comprehension] 1:2 │ [generator] 1:2 │ a 1:7 │ [filter] 1:7 │ [iteration] 1:8 │ i 1:12 │ is 1:16 │ j 1:20 │ js 1:26 │ z julia> parsestmt(SyntaxNode, "[a for i = is for j = js if z]") line:col│ tree │ file_name 1:1 │[comprehension] 1:2 │ [generator] 1:2 │ a 1:7 │ [iteration] 1:8 │ i 1:12 │ is 1:18 │ [filter] 1:18 │ [iteration] 1:19 │ j 1:23 │ js 1:29 │ z --- docs/src/reference.md | 20 +++++++------- src/expr.jl | 38 ++++++++++++++++++-------- src/kinds.jl | 2 +- src/parser.jl | 43 ++++++++++++++---------------- test/parser.jl | 62 +++++++++++++++++++++---------------------- 5 files changed, 89 insertions(+), 76 deletions(-) diff --git a/docs/src/reference.md b/docs/src/reference.md index 2ae2cef1..a98662ee 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -76,7 +76,7 @@ class of tokenization errors and lets the parser deal with them. * Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) * The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) -* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). +* Iterations are represented with the `iteration` head rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration i is) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a longer `iteration` block rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. ## More detail on tree differences @@ -90,8 +90,10 @@ mean ``` for x in xs -for y in ys - push!(xy, collection) + for y in ys + push!(xy, collection) + end +end ``` so the `xy` prefix is in the *body* of the innermost for loop. Following this, @@ -112,8 +114,8 @@ source order. However, our green tree is strictly source-ordered, so we must deviate from the Julia AST. We deal with this by grouping cartesian products of iterators -(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and -use the presence of multiple iterator blocks rather than the `flatten` head to +(separated by commas) within `iteration` blocks as in `for` loops, and +use the length of the `iteration` block rather than the `flatten` head to distinguish flattened iterators. The nested flattens and generators of `Expr` forms are reconstructed later. In this form the tree structure resembles the source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as @@ -121,8 +123,8 @@ source much more closely. For example, `(xy for x in xs for y in ys)` is parsed ``` (generator xy - (= x xs) - (= y ys)) + (iteration x xs) + (iteration y ys)) ``` And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as @@ -130,9 +132,7 @@ And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as ``` (generator xy - (cartesian_iterator - (= x xs) - (= y ys))) + (iteration x xs y ys)) ``` ### Whitespace trivia inside strings diff --git a/src/expr.jl b/src/expr.jl index 4ca0be02..8a7cc8fb 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -193,6 +193,22 @@ function _extract_do_lambda!(args) end end +function _convert_iteration(wrap_iters::Function, ex) + if @isexpr(ex, :iteration) + if length(ex.args) == 2 + wrap_iters(false, Expr(:(=), ex.args[1], ex.args[2])) + else + blk_args = [] + for i = 1:2:length(ex.args) + push!(blk_args, Expr(:(=), ex.args[i], ex.args[i+1])) + end + wrap_iters(true, blk_args) + end + else + wrap_iters(false, ex) + end +end + # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) @@ -296,9 +312,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # Move parameters blocks to args[2] _reorder_parameters!(args, 2) elseif k == K"for" - a1 = args[1] - if @isexpr(a1, :cartesian_iterator) - args[1] = Expr(:block, a1.args...) + args[1] = _convert_iteration(args[1]) do is_multi_iter, iter + is_multi_iter ? Expr(:block, iter...) : iter end # Add extra line number node for the `end` of the block. This may seem # useless but it affects code coverage. @@ -356,10 +371,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, gen = args[1] for j = length(args):-1:2 aj = args[j] - if @isexpr(aj, :cartesian_iterator) - gen = Expr(:generator, gen, aj.args...) - else - gen = Expr(:generator, gen, aj) + gen = _convert_iteration(args[j]) do is_multi_iter, iter + is_multi_iter ? Expr(:generator, gen, iter...) : + Expr(:generator, gen, iter) end if j < length(args) # Additional `for`s flatten the inner generator @@ -371,10 +385,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, @assert length(args) == 2 iterspec = args[1] outargs = Any[args[2]] - if @isexpr(iterspec, :cartesian_iterator) - append!(outargs, iterspec.args) - else - push!(outargs, iterspec) + _convert_iteration(args[1]) do is_multi_iter, iter + if is_multi_iter + append!(outargs, iter) + else + push!(outargs, iter) + end end args = outargs elseif k == K"nrow" || k == K"ncat" diff --git a/src/kinds.jl b/src/kinds.jl index 6de2f26a..d68a141b 100644 --- a/src/kinds.jl +++ b/src/kinds.jl @@ -912,7 +912,7 @@ const _kind_names = # Comprehensions "generator" "filter" - "cartesian_iterator" + "iteration" "comprehension" "typed_comprehension" # Container for a single statement/atom plus any trivia and errors diff --git a/src/parser.jl b/src/parser.jl index 34666b59..e3fc5c73 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1075,7 +1075,7 @@ function parse_where_chain(ps0::ParseState, mark) # x where {T,S} ==> (where x (braces T S)) # Also various nonsensical forms permitted # x where {T S} ==> (where x (bracescat (row T S))) - # x where {y for y in ys} ==> (where x (braces (generator y (= y ys)))) + # x where {y for y in ys} ==> (where x (braces (generator y (iteration y ys)))) m = position(ps) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) @@ -1577,7 +1577,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # T[x y] ==> (typed_hcat T x y) # T[x ; y] ==> (typed_vcat T x y) # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) - # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) + # T[x for x in xs] ==> (typed_comprehension T (generator x (iteration x xs))) #v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d)) outk = ckind == K"vect" ? K"ref" : ckind == K"hcat" ? K"typed_hcat" : @@ -1797,8 +1797,8 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"while") elseif word == K"for" - # for x in xs end ==> (for (= x xs) (block)) - # for x in xs, y in ys \n a \n end ==> (for (cartesian_iterator (= x xs) (= y ys)) (block a)) + # for x in xs end ==> (for (iteration x xs) (block)) + # for x in xs, y in ys \n a \n end ==> (for (iteration x xs y ys) (block a)) bump(ps, TRIVIA_FLAG) parse_iteration_specs(ps) parse_block(ps) @@ -2620,11 +2620,11 @@ function parse_iteration_spec(ps::ParseState) if peek_behind(ps).orig_kind == K"outer" if peek_skip_newline_in_gen(ps) in KSet"= in ∈" # Not outer keyword - # outer = rhs ==> (= outer rhs) - # outer <| x = rhs ==> (= (call-i outer <| x) rhs) + # outer = rhs ==> (iteration outer rhs) + # outer <| x = rhs ==> (iteration (call-i outer <| x) rhs) else - # outer i = rhs ==> (= (outer i) rhs) - # outer (x,y) = rhs ==> (= (outer (tuple-p x y)) rhs) + # outer i = rhs ==> (iteration (outer i) rhs) + # outer (x,y) = rhs ==> (iteration (outer (tuple-p x y)) rhs) reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG) parse_pipe_lt(ps) emit(ps, mark, K"outer") @@ -2640,7 +2640,6 @@ function parse_iteration_spec(ps::ParseState) end # Or try parse_pipe_lt ??? end - emit(ps, mark, K"=") end # Parse an iteration spec, or a comma separate list of such for for loops and @@ -2648,9 +2647,7 @@ end function parse_iteration_specs(ps::ParseState) mark = position(ps) n_iters = parse_comma_separated(ps, parse_iteration_spec) - if n_iters > 1 - emit(ps, mark, K"cartesian_iterator") - end + emit(ps, mark, K"iteration") end # flisp: parse-space-separated-exprs @@ -2700,19 +2697,19 @@ end # Parse generators # # We represent generators quite differently from `Expr`: -# * Cartesian products of iterators are grouped within cartesian_iterator +# * Iteration variables and their iterators are grouped within K"iteration" # nodes, as in the short form of `for` loops. # * The `generator` kind is used for both cartesian and flattened generators # -# (x for a in as for b in bs) ==> (parens (generator x (= a as) (= b bs))) -# (x for a in as, b in bs) ==> (parens (generator x (cartesian_iterator (= a as) (= b bs)))) -# (x for a in as, b in bs if z) ==> (parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z))) +# (x for a in as for b in bs) ==> (parens (generator x (iteration a as) (iteration b bs))) +# (x for a in as, b in bs) ==> (parens (generator x (iteration a as b bs))) +# (x for a in as, b in bs if z) ==> (parens (generator x (filter (iteration a as b bs) z))) # # flisp: parse-generator function parse_generator(ps::ParseState, mark) while (t = peek_token(ps); kind(t) == K"for") if !preceding_whitespace(t) - # ((x)for x in xs) ==> (parens (generator (parens x) (error) (= x xs))) + # ((x)for x in xs) ==> (parens (generator (parens x) (error) (iteration x xs))) bump_invisible(ps, K"error", TRIVIA_FLAG, error="Expected space before `for` in generator") end @@ -2720,7 +2717,7 @@ function parse_generator(ps::ParseState, mark) iter_mark = position(ps) parse_iteration_specs(ps) if peek(ps) == K"if" - # (x for a in as if z) ==> (parens (generator x (filter (= a as) z))) + # (x for a in as if z) ==> (parens (generator x (filter (iteration a as) z))) bump(ps, TRIVIA_FLAG) parse_cond(ps) emit(ps, iter_mark, K"filter") @@ -2731,7 +2728,7 @@ end # flisp: parse-comprehension function parse_comprehension(ps::ParseState, mark, closer) - # [x for a in as] ==> (comprehension (generator x a in as)) + # [x for a in as] ==> (comprehension (generator x (iteration a as))) ps = ParseState(ps, whitespace_newline=true, space_sensitive=false, end_symbol=false) @@ -2981,8 +2978,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [x ==> (vect x (error-t)) parse_vect(ps, closer) elseif k == K"for" - # [x for a in as] ==> (comprehension (generator x (= a as))) - # [x \n\n for a in as] ==> (comprehension (generator x (= a as))) + # [x for a in as] ==> (comprehension (generator x (iteration a as))) + # [x \n\n for a in as] ==> (comprehension (generator x (iteration a as))) parse_comprehension(ps, mark, closer) else # [x y] ==> (hcat x y) @@ -3138,8 +3135,8 @@ function parse_brackets(after_parse::Function, continue elseif k == K"for" # Generator syntax - # (x for a in as) ==> (parens (generator x (= a as))) - # (x \n\n for a in as) ==> (parens (generator x (= a as))) + # (x for a in as) ==> (parens (generator x (iteration a as))) + # (x \n\n for a in as) ==> (parens (generator x (iteration a as))) parse_generator(ps, mark) else # Error - recovery done when consuming closing_kind diff --git a/test/parser.jl b/test/parser.jl index 3ac3ed36..1540a0cb 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -299,7 +299,7 @@ tests = [ "x where \n {T}" => "(where x (braces T))" "x where {T,S}" => "(where x (braces T S))" "x where {T S}" => "(where x (bracescat (row T S)))" - "x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))" + "x where {y for y in ys}" => "(where x (braces (generator y (iteration y ys))))" "x where T" => "(where x T)" "x where \n T" => "(where x T)" "x where T<:S" => "(where x (<: T S))" @@ -388,7 +388,7 @@ tests = [ "T[x y]" => "(typed_hcat T x y)" "T[x ; y]" => "(typed_vcat T x y)" "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" - "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" + "T[x for x in xs]" => "(typed_comprehension T (generator x (iteration x xs)))" ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" # Dotted forms @@ -460,8 +460,8 @@ tests = [ "while cond body end" => "(while cond (block body))" "while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))" # for - "for x in xs end" => "(for (= x xs) (block))" - "for x in xs, y in ys \n a \n end" => "(for (cartesian_iterator (= x xs) (= y ys)) (block a))" + "for x in xs end" => "(for (iteration x xs) (block))" + "for x in xs, y in ys \n a \n end" => "(for (iteration x xs y ys) (block a))" # let "let x=1\n end" => "(let (block (= x 1)) (block))" "let x=1 ; end" => "(let (block (= x 1)) (block))" @@ -669,16 +669,16 @@ tests = [ "import A..." => "(import (importpath A ..))" "import A; B" => "(import (importpath A))" ], - JuliaSyntax.parse_iteration_spec => [ - "i = rhs" => "(= i rhs)" - "i in rhs" => "(= i rhs)" - "i ∈ rhs" => "(= i rhs)" - "i = 1:10" => "(= i (call-i 1 : 10))" - "(i,j) in iter" => "(= (tuple-p i j) iter)" - "outer = rhs" => "(= outer rhs)" - "outer <| x = rhs" => "(= (call-i outer <| x) rhs)" - "outer i = rhs" => "(= (outer i) rhs)" - "outer (x,y) = rhs" => "(= (outer (tuple-p x y)) rhs)" + JuliaSyntax.parse_iteration_specs => [ + "i = rhs" => "(iteration i rhs)" + "i in rhs" => "(iteration i rhs)" + "i ∈ rhs" => "(iteration i rhs)" + "i = 1:10" => "(iteration i (call-i 1 : 10))" + "(i,j) in iter" => "(iteration (tuple-p i j) iter)" + "outer = rhs" => "(iteration outer rhs)" + "outer <| x = rhs" => "(iteration (call-i outer <| x) rhs)" + "outer i = rhs" => "(iteration (outer i) rhs)" + "outer (x,y) = rhs" => "(iteration (outer (tuple-p x y)) rhs)" ], JuliaSyntax.parse_paren => [ # Tuple syntax with commas @@ -706,8 +706,8 @@ tests = [ "(x)" => "(parens x)" "(a...)" => "(parens (... a))" # Generators - "(x for a in as)" => "(parens (generator x (= a as)))" - "(x \n\n for a in as)" => "(parens (generator x (= a as)))" + "(x for a in as)" => "(parens (generator x (iteration a as)))" + "(x \n\n for a in as)" => "(parens (generator x (iteration a as)))" # Range parsing in parens "(1:\n2)" => "(parens (call-i 1 : 2))" "(1:2)" => "(parens (call-i 1 : 2))" @@ -775,19 +775,19 @@ tests = [ "[x \n, ]" => "(vect x)" "[x" => "(vect x (error-t))" "[x \n\n ]" => "(vect x)" - "[x for a in as]" => "(comprehension (generator x (= a as)))" - "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" + "[x for a in as]" => "(comprehension (generator x (iteration a as)))" + "[x \n\n for a in as]" => "(comprehension (generator x (iteration a as)))" # parse_generator - "(x for a in as for b in bs)" => "(parens (generator x (= a as) (= b bs)))" - "(x for a in as, b in bs)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs))))" - "(x for a in as, b in bs if z)" => "(parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z)))" - "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs)) (cartesian_iterator (= c cs) (= d ds))))" - "(x for a in as for b in bs if z)" => "(parens (generator x (= a as) (filter (= b bs) z)))" - "(x for a in as if z for b in bs)" => "(parens (generator x (filter (= a as) z) (= b bs)))" - "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" - "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))" - "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (= x xs)))" - "(x for a in as if z)" => "(parens (generator x (filter (= a as) z)))" + "(x for a in as for b in bs)" => "(parens (generator x (iteration a as) (iteration b bs)))" + "(x for a in as, b in bs)" => "(parens (generator x (iteration a as b bs)))" + "(x for a in as, b in bs if z)" => "(parens (generator x (filter (iteration a as b bs) z)))" + "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (iteration a as b bs) (iteration c cs d ds)))" + "(x for a in as for b in bs if z)" => "(parens (generator x (iteration a as) (filter (iteration b bs) z)))" + "(x for a in as if z for b in bs)" => "(parens (generator x (filter (iteration a as) z) (iteration b bs)))" + "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (iteration a as) (filter (iteration b bs) cond1) (filter (iteration c cs) cond2)))" + "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (iteration a as) (block cond2))))" + "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (iteration x xs)))" + "(x for a in as if z)" => "(parens (generator x (filter (iteration a as) z)))" # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)" @@ -875,8 +875,8 @@ tests = [ "\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))" "\"\$(x,y)\"" => "(string (parens (error x y)))" "\"\$(x;y)\"" => "(string (parens (error x y)))" - "\"\$(x for y in z)\"" => "(string (parens (error (generator x (= y z)))))" - "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (= y z)))))" + "\"\$(x for y in z)\"" => "(string (parens (error (generator x (iteration y z)))))" + "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (iteration y z)))))" "\"\$(xs...)\"" => "(string (parens (... xs)))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" @@ -995,7 +995,7 @@ parsestmt_test_specs = [ ":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))" # unary subtype ops and newlines "a +\n\n<:" => "(call-i a + <:)" - "for\n\n<:" => "(for (= <: (error (error-t))) (block (error)) (error-t))" + "for\n\n<:" => "(for (iteration <: (error (error-t))) (block (error)) (error-t))" # Empty character consumes trailing ' delimiter (ideally this could be # tested above but we don't require the input stream to be consumed in the # unit tests there.