From 6119a75dd7a06ef5c1991614484d681ea471d355 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= <miklos.koren@gmail.com>
Date: Sun, 21 Jul 2024 11:56:30 +0200
Subject: [PATCH 01/15] implement reshape wide (#172)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

no tests yet

```julia
julia> df
4×3 DataFrame
 Row │ i      j      x
     │ Int64  Int64  Int64
─────┼─────────────────────
   1 │     1      1      1
   2 │     2      1      2
   3 │     1      2      3
   4 │     2      2      4

julia> @with df @reshape wide x, i(i) j(j)
Kezdi.jl> @reshape_wide x, i(i) j(j)

2×3 DataFrame
 Row │ i      x1      x2
     │ Int64  Int64?  Int64?
─────┼───────────────────────
   1 │     1       1       3
   2 │     2       2       4
 ```
---
 src/Kezdi.jl    |  2 +-
 src/commands.jl | 12 ++++++++++++
 src/macros.jl   | 15 +++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/Kezdi.jl b/src/Kezdi.jl
index 9348046..c5657bb 100644
--- a/src/Kezdi.jl
+++ b/src/Kezdi.jl
@@ -2,7 +2,7 @@
 Kezdi.jl is a Julia package for data manipulation and analysis. It is inspired by Stata, but it is written in Julia, which makes it faster and more flexible. It is designed to be used in the Julia REPL, but it can also be used in Jupyter notebooks or in scripts.
 """
 module Kezdi
-export @generate, @replace, @egen, @collapse, @keep, @drop, @summarize, @regress, @use, @tabulate, @count, @sort, @order, @list, @head, @tail, @names, @rename, @clear, @describe
+export @generate, @replace, @egen, @collapse, @keep, @drop, @summarize, @regress, @use, @tabulate, @count, @sort, @order, @list, @head, @tail, @names, @rename, @clear, @describe, @reshape
 
 export getdf, setdf, display_and_return, keep_only_values, rowcount, distinct, cond
 
diff --git a/src/commands.jl b/src/commands.jl
index faac0f8..0599a83 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -1,6 +1,18 @@
 # use multiple dispatch to generate code 
 rewrite(command::Command) = rewrite(Val(command.command), command)
 
+function rewrite(::Val{:reshape_wide}, command::Command)
+    gc = generate_command(command; options=[:variables], allowed=[:i, :j])
+    (; local_copy, target_df, setup, teardown, arguments, options) = gc
+    i = get_option(command, :i)[1] |> replace_column_references
+    j = get_option(command, :j)[1] |> replace_column_references
+    var = collect(arguments)[1] |> replace_column_references
+    quote
+        $setup
+        unstack($local_copy, $i, $j, $var, renamecols = x -> Symbol($var, x)) |> $teardown |> setdf
+    end |> esc
+end
+
 function rewrite(::Val{:rename}, command::Command)
     gc = generate_command(command; options=[:variables], allowed=[])
     (; local_copy, target_df, setup, teardown, arguments, options) = gc
diff --git a/src/macros.jl b/src/macros.jl
index f95f1d0..bbeedf5 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -198,3 +198,18 @@ macro describe(exprs...)
     :describe |> parse(exprs)  |> rewrite
 end
 
+"""
+    @reshape long y1 y2 ... i(var) j(var) 
+    @reshape wide y1 y2 ... i(var) j(var)
+
+Reshape the data frame from wide to long or from long to wide format. The variables `y1`, `y2`, etc. are the variables to be reshaped. The `i(var)` and `j(var)` are the variables that define the row and column indices in the reshaped data frame.
+"""
+macro reshape(exprs...)
+    if exprs[1] == :long
+        :reshape_long |> parse(exprs[2:end]) |> rewrite
+    elseif exprs[1] == :wide
+        :reshape_wide |> parse(exprs[2:end]) |> rewrite
+    else
+        ArgumentError("Invalid option $(exprs[1]). Correct syntax:\n@reshape long y1 y2 ... i(var) j(var)\n@reshape wide y1 y2 ... i(var) j(var)") |> throw
+    end
+end
\ No newline at end of file

From a79bc3aa0e845ec23c25e82842eb667e1ee45626 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= <miklos.koren@gmail.com>
Date: Fri, 26 Jul 2024 17:55:40 +0200
Subject: [PATCH 02/15] bugfix: double vectorization in replace

- fixes #182
- new test added
- 405/405 tests pass
- version bumped to 0.5.1
---
 src/commands.jl  | 2 +-
 test/commands.jl | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/commands.jl b/src/commands.jl
index 0599a83..467621e 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -43,7 +43,7 @@ function rewrite(::Val{:replace}, command::Command)
     target_column = get_LHS(command.arguments[1])
     LHS, RHS = split_assignment(arguments[1])
     third_vector = gensym()
-    bitmask = build_bitmask(local_copy, vectorize_function_calls(replace_column_references(local_copy, command.condition)))
+    bitmask = build_bitmask(local_copy, command.condition)
     quote
         !($target_column in names(getdf())) && ArgumentError("Column \"$($target_column)\" does not exist in $(names(getdf()))") |> throw
         $setup
diff --git a/test/commands.jl b/test/commands.jl
index de7517a..b100762 100644
--- a/test/commands.jl
+++ b/test/commands.jl
@@ -112,6 +112,11 @@ end
     @testset "Error handling" begin
         @test_throws Exception @with df @replace y = 1
     end
+
+    @testset "Double vectorization bug (#182)" begin
+        positive(x) = x > 0
+        @test (@with DataFrame(x=1:4, y=5:8) @replace y = 0 @if positive(x - 2)).y == [5, 6, 0, 0]
+    end
 end
 
 @testset "Missing values" begin

From c07d23b7ec0053a0cff79ea1dec496344faaec5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= <miklos.koren@gmail.com>
Date: Fri, 26 Jul 2024 17:55:40 +0200
Subject: [PATCH 03/15] bugfix: double vectorization in replace

- fixes #182
- new test added
- 405/405 tests pass
- version bumped to 0.5.1
---
 Project.toml     | 2 +-
 src/commands.jl  | 2 +-
 test/commands.jl | 5 +++++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 15406e8..b29eec1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Kezdi"
 uuid = "48308a23-c29e-446c-b4c0-d9446a767439"
 authors = ["Miklos Koren <miklos.koren@gmail.com>", "Gergely Attila Kiss <corra971407@gmail.com>"]
-version = "0.5.0"
+version = "0.5.1"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
diff --git a/src/commands.jl b/src/commands.jl
index 0599a83..467621e 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -43,7 +43,7 @@ function rewrite(::Val{:replace}, command::Command)
     target_column = get_LHS(command.arguments[1])
     LHS, RHS = split_assignment(arguments[1])
     third_vector = gensym()
-    bitmask = build_bitmask(local_copy, vectorize_function_calls(replace_column_references(local_copy, command.condition)))
+    bitmask = build_bitmask(local_copy, command.condition)
     quote
         !($target_column in names(getdf())) && ArgumentError("Column \"$($target_column)\" does not exist in $(names(getdf()))") |> throw
         $setup
diff --git a/test/commands.jl b/test/commands.jl
index de7517a..b100762 100644
--- a/test/commands.jl
+++ b/test/commands.jl
@@ -112,6 +112,11 @@ end
     @testset "Error handling" begin
         @test_throws Exception @with df @replace y = 1
     end
+
+    @testset "Double vectorization bug (#182)" begin
+        positive(x) = x > 0
+        @test (@with DataFrame(x=1:4, y=5:8) @replace y = 0 @if positive(x - 2)).y == [5, 6, 0, 0]
+    end
 end
 
 @testset "Missing values" begin

From ec1b71ba87df2ae9a46a96b44f21dfc15e19b39f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= <miklos.koren@gmail.com>
Date: Mon, 29 Jul 2024 22:49:33 +0200
Subject: [PATCH 04/15] Reshape wide for single and multiple vars (WIP)

---
 src/commands.jl | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/commands.jl b/src/commands.jl
index 0845c4f..8472d44 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -6,10 +6,28 @@ function rewrite(::Val{:reshape_wide}, command::Command)
     (; local_copy, target_df, setup, teardown, arguments, options) = gc
     i = get_option(command, :i)[1] |> replace_column_references
     j = get_option(command, :j)[1] |> replace_column_references
-    var = collect(arguments)[1] |> replace_column_references
+    vars = collect(arguments) |> replace_column_references
+    df_list = gensym()
+    combined_df = gensym()
+    #=
+    TODO: 
+    - multiple vqribales
+        - unstack can only do 1 variable at a time
+    - varlist in i
+    =#
+    length(vars) > 1 ?
     quote
         $setup
-        unstack($local_copy, $i, $j, $var, renamecols = x -> Symbol($var, x)) |> $teardown |> setdf
+        $df_list = [unstack($target_df, $i, $j, var, renamecols = x -> Symbol(var, x)) for var in $vars]
+        $combined_df = $df_list[1]
+        for df in $df_list[2:end]
+            $combined_df = innerjoin($combined_df, df, on = $i)
+        end
+        $combined_df |> $teardown |> setdf
+    end |> esc  :
+    quote
+        $setup
+        unstack($target_df, $i, $j, $vars[1], renamecols = x -> Symbol($vars[1], x)) |> $teardown |> setdf
     end |> esc
 end
 

From 4d0fc3bc99545e65abfbe825194d0fc4851381b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= <miklos.koren@gmail.com>
Date: Tue, 30 Jul 2024 08:25:18 +0200
Subject: [PATCH 05/15] Add tests for reshape wide (#172)

- 451/451 pass
---
 src/commands.jl  |  8 ++++++--
 test/commands.jl | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/commands.jl b/src/commands.jl
index 8472d44..7a5e125 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -1,9 +1,15 @@
 # use multiple dispatch to generate code 
 rewrite(command::Command) = rewrite(Val(command.command), command)
 
+function rewrite(::Val{:reshape_long}, command::Command)
+    error("@reshape long not implemented yet")
+end
+
 function rewrite(::Val{:reshape_wide}, command::Command)
     gc = generate_command(command; options=[:variables], allowed=[:i, :j])
     (; local_copy, target_df, setup, teardown, arguments, options) = gc
+    get_option(command, :i) isa Nothing && ArgumentError("i() is mandatory. Syntax is @reshape wide y1 y2 ... i(var) j(var)") |> throw
+    get_option(command, :j) isa Nothing && ArgumentError("j() is mandatory. Syntax is @reshape wide y1 y2 ... i(var) j(var)") |> throw
     i = get_option(command, :i)[1] |> replace_column_references
     j = get_option(command, :j)[1] |> replace_column_references
     vars = collect(arguments) |> replace_column_references
@@ -11,8 +17,6 @@ function rewrite(::Val{:reshape_wide}, command::Command)
     combined_df = gensym()
     #=
     TODO: 
-    - multiple vqribales
-        - unstack can only do 1 variable at a time
     - varlist in i
     =#
     length(vars) > 1 ?
diff --git a/test/commands.jl b/test/commands.jl
index ebdbc8e..b77fdb0 100644
--- a/test/commands.jl
+++ b/test/commands.jl
@@ -768,4 +768,43 @@ end
     @use "test.dta", clear
     @test df == getdf()
     try @use "test.dta" @if x<5, clear; catch e; @test e isa LoadError; end
+end
+
+@testset "Reshape wide" begin
+    df = DataFrame(i=[1, 1, 2, 2], j=[1, 2, 1, 2], x=1:4, y=5:8)
+    @testset "Known values" begin
+        df2 = @with df @reshape wide x y, i(i) j(j)
+        @test names(df2) == ["i", "x1", "x2", "y1", "y2"]
+        @test all(df2.x1 .== [1, 3])
+        @test all(df2.x2 .== [2, 4])
+        @test all(df2.y1 .== [5, 7])
+        @test all(df2.y2 .== [6, 8])
+        df2 = @with df @reshape wide x, i(i) j(j)
+        @test names(df2) == ["i", "x1", "x2"]
+        @test all(df2.x1 .== [1, 3])
+        @test all(df2.x2 .== [2, 4])
+        df2 = @with df @reshape wide x, i(j) j(i)
+        @test names(df2) == ["j", "x1", "x2"]
+        @test all(df2.x1 .== [1, 2])
+        @test all(df2.x2 .== [3, 4])
+    end
+
+    @testset "Unbalanced panel" begin
+        df = DataFrame(i=[1, 1, 2, 2, 2], j=[1, 2, 1, 2, 3], x=1:5, y=5:9)
+        df2 = @with df @reshape wide x y, i(i) j(j)
+        @test names(df2) == ["i", "x1", "x2", "x3", "y1", "y2", "y3"]
+        @test all(df2.x1 .== [1, 3])
+        @test all(df2.x2 .== [2, 4])
+        @test all(df2.x3 .=== [missing, 5])
+        @test all(df2.y1 .== [5, 7])
+        @test all(df2.y2 .== [6, 8])
+        @test all(df2.y3 .=== [missing, 9])
+        df2 = @with df @reshape wide x y, i(j) j(i)
+        @test names(df2) == ["j", "x1", "x2", "y1", "y2"]
+        @test all(df2.j .== [1, 2, 3])
+        @test all(df2.x1 .=== [1, 2, missing])
+        @test all(df2.x2 .== [3, 4, 5])
+        @test all(df2.y1 .=== [5, 6, missing])
+        @test all(df2.y2 .== [7, 8, 9])
+    end
 end
\ No newline at end of file

From eb68c2339a45cf77a2cca0f4c7e526fb87fa3f18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= <miklos.koren@gmail.com>
Date: Tue, 30 Jul 2024 08:34:58 +0200
Subject: [PATCH 06/15] Implement multiple i() variables in `@reshape wide`

- re #172
- update documentation
- tests added
- 458/458 pass
---
 src/commands.jl  |  7 ++-----
 src/macros.jl    |  6 ++++--
 test/commands.jl | 12 ++++++++++++
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/commands.jl b/src/commands.jl
index 7a5e125..bdfd524 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -10,15 +10,12 @@ function rewrite(::Val{:reshape_wide}, command::Command)
     (; local_copy, target_df, setup, teardown, arguments, options) = gc
     get_option(command, :i) isa Nothing && ArgumentError("i() is mandatory. Syntax is @reshape wide y1 y2 ... i(var) j(var)") |> throw
     get_option(command, :j) isa Nothing && ArgumentError("j() is mandatory. Syntax is @reshape wide y1 y2 ... i(var) j(var)") |> throw
-    i = get_option(command, :i)[1] |> replace_column_references
+    length(get_option(command, :j)) > 1 && ArgumentError("Only one variable can be specified for j() in @reshape wide") |> throw
+    i = get_option(command, :i) |> replace_column_references
     j = get_option(command, :j)[1] |> replace_column_references
     vars = collect(arguments) |> replace_column_references
     df_list = gensym()
     combined_df = gensym()
-    #=
-    TODO: 
-    - varlist in i
-    =#
     length(vars) > 1 ?
     quote
         $setup
diff --git a/src/macros.jl b/src/macros.jl
index bbeedf5..2d61817 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -199,10 +199,12 @@ macro describe(exprs...)
 end
 
 """
-    @reshape long y1 y2 ... i(var) j(var) 
-    @reshape wide y1 y2 ... i(var) j(var)
+    @reshape long y1 y2 ... i(varlist) j(var) 
+    @reshape wide y1 y2 ... i(varlist) j(var)
 
 Reshape the data frame from wide to long or from long to wide format. The variables `y1`, `y2`, etc. are the variables to be reshaped. The `i(var)` and `j(var)` are the variables that define the row and column indices in the reshaped data frame.
+
+The option `i()` may include multiple variables, like `i(var1, var2, var3)`. The option `j()` must include only one variable.
 """
 macro reshape(exprs...)
     if exprs[1] == :long
diff --git a/test/commands.jl b/test/commands.jl
index b77fdb0..2df535b 100644
--- a/test/commands.jl
+++ b/test/commands.jl
@@ -807,4 +807,16 @@ end
         @test all(df2.y1 .=== [5, 6, missing])
         @test all(df2.y2 .== [7, 8, 9])
     end
+
+    @testset "Multiple i variables" begin
+        df = DataFrame(i1=[1, 1, 2, 2], i2=[0, 0, 0, 1], j=[1, 2, 1, 2], x=1:4, y=5:8)
+        df2 = @with df @reshape wide x y, i(i1, i2) j(j)
+        @test names(df2) == ["i1", "i2", "x1", "x2", "y1", "y2"]
+        @test all(df2.i1 .== [1, 2, 2])
+        @test all(df2.i2 .== [0, 0, 1])
+        @test all(df2.x1 .=== [1, 3, missing])
+        @test all(df2.x2 .=== [2, missing, 4])
+        @test all(df2.y1 .=== [5, 7, missing])
+        @test all(df2.y2 .=== [6, missing, 8])
+    end
 end
\ No newline at end of file

From 72878936686a69d2d1e4736610790a50c8adee7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= <miklos.koren@gmail.com>
Date: Thu, 19 Sep 2024 08:26:14 +0200
Subject: [PATCH 07/15] Add  and  to speed benchmarks

---
 docs/examples/benchmark.do | 24 ++++++++++++++++++++----
 docs/examples/benchmark.jl |  6 ++++++
 docs/src/index.md          | 16 +++++++++-------
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/docs/examples/benchmark.do b/docs/examples/benchmark.do
index 2aa8fc1..3ebd9bc 100644
--- a/docs/examples/benchmark.do
+++ b/docs/examples/benchmark.do
@@ -10,11 +10,27 @@ gen i = _n
 set seed 12345
 gen g = floor(runiform() * 100)
 
+timer clear 1
+preserve
+timer on 1
+    generate ln_i = log(i)
+timer off 1
+restore
+timer list 1
+
+timer clear 1
+preserve
+timer on 1
+    replace g = 2*i
+timer off 1
+restore
+timer list 1
+
 * Measure time for mean calculation by group
 timer clear 1
 preserve
 timer on 1
-egen mean_i = mean(i), by(g)
+    egen mean_i = mean(i), by(g)
 timer off 1
 restore
 timer list 1
@@ -23,7 +39,7 @@ timer list 1
 preserve
 timer clear 3
 timer on 3
-collapse (mean) mean_i=i, by(g)
+    collapse (mean) mean_i=i, by(g)
 timer off 3
 restore
 timer list 3
@@ -38,7 +54,7 @@ timer list 5
 * Measure time for summarize
 timer clear 7
 timer on 7
-summarize g, detail
+    summarize g, detail
 timer off 7
 timer list 7
 
@@ -46,7 +62,7 @@ timer list 7
 preserve
 timer clear 9
 timer on 9
-regress i g if g > 50
+    regress i g if g > 50
 timer off 9
 restore
 timer list 9
diff --git a/docs/examples/benchmark.jl b/docs/examples/benchmark.jl
index 7630ec8..bd26e83 100644
--- a/docs/examples/benchmark.jl
+++ b/docs/examples/benchmark.jl
@@ -6,6 +6,12 @@ using Pkg; Pkg.precompile()
 df = DataFrame(i = 1:10_000_000)
 df.g = rand(0:99, nrow(df))
 
+println("Generate")
+@btime @with df @generate ln_i = log(i)
+
+println("Replace")
+@btime @with df @replace g = 2*i
+
 println("Egen")
 @btime @with df  @egen mean_i = mean(i), by(g)
 
diff --git a/docs/src/index.md b/docs/src/index.md
index a6b48e7..24f1afe 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -65,13 +65,15 @@ end
 ### Free and open-source
 ### Speed
 
-| Command      | Stata | Julia 1st run | Julia 2nd run | Speedup |
-| ------------ | ----- | ------------- | ------------- | ------- |
-| `@egen`      | 4.90s | 1.36s         | 0.36s         | 14x     |
-| `@collapse`  | 0.92s | 0.39s         | 0.28s         | 3x      |
-| `@tabulate`  | 2.14s | 0.68s         | 0.09s         | 24x     |
-| `@summarize` | 10.40s | 0.58s         | 0.36s         | 29x     |
-| `@regress`   | 0.89s | 1.95s         | 0.11s         | 8x      |
+| Command      | Stata  | Julia 2nd run | Speedup |
+| ------------ | -----  | ------------- | ------- |
+| `@generate`  | 230ms  | 46ms | 5x |
+| `@replace`   | 232ms  | 43ms | 5x |
+| `@egen`      | 5.00s  | 0.37s         | 13x     |
+| `@collapse`  | 0.94s  | 0.28s         | 3x      |
+| `@tabulate`  | 2.19s  | 0.09s         | 24x     |
+| `@summarize` | 10.56s | 0.35s         | 30x     |
+| `@regress`   | 0.85s  | 0.14s         | 6x      |
 
 See the benchmarking code for [Stata](https://github.com/codedthinking/Kezdi.jl/blob/main/docs/examples/benchmark.do) and [Kezdi.jl](https://github.com/codedthinking/Kezdi.jl/blob/main/docs/examples/benchmark.jl).
 

From ac881ffa3f3316d767d5ac5b8e62668b80fb351b Mon Sep 17 00:00:00 2001
From: "Gergely Attila Kiss (Geri)"
 <47605029+gergelyattilakiss@users.noreply.github.com>
Date: Thu, 7 Nov 2024 16:25:59 +0100
Subject: [PATCH 08/15] Update commands.jl

---
 test/commands.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/commands.jl b/test/commands.jl
index de4d429..ad563e4 100644
--- a/test/commands.jl
+++ b/test/commands.jl
@@ -874,7 +874,6 @@ end
         @test all(df2.y2 .=== [6, missing, 8])
     end
 end
-end
 
 @testset "Save" begin
     @clear

From eb638a1ff163e2978c3bd733d662a293567dad39 Mon Sep 17 00:00:00 2001
From: Gergely Attila Kiss <corra971407@gmail.com>
Date: Fri, 8 Nov 2024 07:59:47 +0100
Subject: [PATCH 09/15] cover reshape long lines with tests

---
 src/macros.jl    | 32 ++++++++++++++++++--------------
 test/commands.jl | 28 +++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/src/macros.jl b/src/macros.jl
index dd8fb46..e745202 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -132,14 +132,14 @@ Read the data from the file `filename.dta` and set it as the global data frame.
 """
 macro use(exprs...)
     command = parse(exprs, :use)
-    length(command.arguments) == 1 || ArgumentError("@use takes a single file name as an argument:\n@use \"filename.dta\"[, clear]") |> throw 
+    length(command.arguments) == 1 || ArgumentError("@use takes a single file name as an argument:\n@use \"filename.dta\"[, clear]") |> throw
     # clear is the only permissible option
     isempty(filter(x -> x != :clear, command.options)) || ArgumentError("Invalid options $(string.(command.options)). Correct syntax:\n@use \"filename.dta\"[, clear]") |> throw
     fname = command.arguments[1]
     clear = :clear in command.options
     isnothing(getdf()) || clear || ArgumentError("There is already a global data frame set. If you want to replace it, use the \", clear\" option.") |> throw
 
-    :(println("$(Kezdi.prompt())$($command)\n");Kezdi.use($fname)) |> esc
+    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.use($fname)) |> esc
 end
 
 """
@@ -154,7 +154,7 @@ macro save(exprs...)
     fname = command.arguments[1]
     replace = :replace in command.options
     ispath(fname) && !replace && ArgumentError("File $fname already exists.") |> throw
-    :(println("$(Kezdi.prompt())$($command)\n");Kezdi.save($fname)) |> esc
+    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.save($fname)) |> esc
 end
 
 """
@@ -167,7 +167,7 @@ macro append(exprs...)
     length(command.arguments) == 1 || ArgumentError("@append takes a single file name as an argument:\n@append \"filename.dta\"") |> throw
     isnothing(getdf()) && ArgumentError("There is no data frame to append to.") |> throw
     fname = command.arguments[1]
-    :(println("$(Kezdi.prompt())$($command)\n");Kezdi.append($fname)) |> esc
+    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.append($fname)) |> esc
 end
 """
     @head [n]
@@ -175,7 +175,7 @@ end
 Display the first `n` rows of the data frame. By default, `n` is 5.
 """
 macro head(n=5)
-    :(println("$(Kezdi.prompt())@head $($n)\n");first(getdf(), $n) |> display_and_return) |> esc
+    :(println("$(Kezdi.prompt())@head $($n)\n"); first(getdf(), $n) |> display_and_return) |> esc
 end
 
 """
@@ -184,7 +184,7 @@ end
 Display the last `n` rows of the data frame. By default, `n` is 5.
 """
 macro tail(n=5)
-    :(println("$(Kezdi.prompt())@tail $($n)\n");last(getdf(), $n) |> display_and_return) |> esc
+    :(println("$(Kezdi.prompt())@tail $($n)\n"); last(getdf(), $n) |> display_and_return) |> esc
 end
 
 """
@@ -193,7 +193,7 @@ end
 Display the names of the variables in the data frame.
 """
 macro names()
-    :(println("$(Kezdi.prompt())@names\n");names(getdf()) |> display_and_return) |> esc
+    :(println("$(Kezdi.prompt())@names\n"); names(getdf()) |> display_and_return) |> esc
 end
 
 """
@@ -211,7 +211,7 @@ end
 Clears the global dataframe.
 """
 macro clear()
-    :(println("$(Kezdi.prompt())@clear\n");setdf(nothing))
+    :(println("$(Kezdi.prompt())@clear\n"); setdf(nothing))
 end
 
 """
@@ -220,7 +220,7 @@ end
 Show the names and data types of columns of the data frame. If no variable names given, all are shown. 
 """
 macro describe(exprs...)
-    :describe |> parse(exprs)  |> rewrite
+    :describe |> parse(exprs) |> rewrite
 end
 
 """
@@ -233,19 +233,23 @@ The option `i()` may include multiple variables, like `i(var1, var2, var3)`. The
 """
 macro reshape(exprs...)
     if exprs[1] == :long
-        :reshape_long |> parse(exprs[2:end]) |> rewrite
+        return quote
+            :reshape_long |> parse(exprs[2:end]) |> rewrite
+        end
     elseif exprs[1] == :wide
         :reshape_wide |> parse(exprs[2:end]) |> rewrite
     else
-        ArgumentError("Invalid option $(exprs[1]). Correct syntax:\n@reshape long y1 y2 ... i(var) j(var)\n@reshape wide y1 y2 ... i(var) j(var)") |> throw
+        return quote
+            ArgumentError("Invalid option $(exprs[1]). Correct syntax:\n@reshape long y1 y2 ... i(var) j(var)\n@reshape wide y1 y2 ... i(var) j(var)") |> throw
+        end
     end
-end 
+end
 
 """
     @mvencode y1 y2 [_all] ... [if condition], [mv(value)]
 
-Encode missing values in the variables `y1`, `y2`, etc. in the data frame. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `mv` is provided, the missing values are encoded with the value `value`. By default value is `missing` making no changes on the dataframe. Using `_all` encodes all varibles of the DataFrame.
+Encode missing values in the variables `y1`, `y2`, etc. in the data frame. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `mv` is provided, the missing values are encoded with the value `value`. By default value is `missing` making no changes on the dataframe. Using `_all` encodes all variables of the DataFrame.
 """
 macro mvencode(exprs...)
     :mvencode |> parse(exprs) |> rewrite
-end
\ No newline at end of file
+end
diff --git a/test/commands.jl b/test/commands.jl
index ad563e4..76d9b0f 100644
--- a/test/commands.jl
+++ b/test/commands.jl
@@ -821,7 +821,11 @@ end
     df = DataFrame(x=1:10, y=11:20)
     @use "test.dta", clear
     @test df == getdf()
-    try @use "test.dta" @if x<5, clear; catch e; @test e isa LoadError; end
+    try
+        @use "test.dta" @if x < 5, clear
+    catch e
+        @test e isa UndefVarError
+    end
 end
 
 @testset "Reshape wide" begin
@@ -875,6 +879,28 @@ end
     end
 end
 
+@testset "Reshape long" begin
+    df = DataFrame(i=[1, 1, 2, 2], j=[1, 2, 1, 2], x=1:4, y=5:8)
+    @testset "Known values" begin
+        @test_throws UndefVarError df2 = @with df @reshape long x y, i(i) j(j)
+    end
+
+    @testset "Unbalanced panel" begin
+        df = DataFrame(i=[1, 1, 2, 2, 2], j=[1, 2, 1, 2, 3], x=1:5, y=5:9)
+        @test_throws UndefVarError df2 = @with df @reshape long x y, i(i) j(j)
+    end
+
+    @testset "Multiple i variables" begin
+        df = DataFrame(i1=[1, 1, 2, 2], i2=[0, 0, 0, 1], j=[1, 2, 1, 2], x=1:4, y=5:8)
+        @test_throws UndefVarError df2 = @with df @reshape long x y, i(i1, i2) j(j)
+    end
+end
+
+@testset "Reshape invalid" begin
+    df = DataFrame(i=[1, 1, 2, 2], j=[1, 2, 1, 2], x=1:4, y=5:8)
+    @test_throws UndefVarError df2 = @with df @reshape invalid x y, i(i) j(j)
+end
+
 @testset "Save" begin
     @clear
     df = DataFrame(x=Vector{Any}(1:11), y=11:21)

From 643a6728a38f816ecce56ad7060f1de3805ecfcf Mon Sep 17 00:00:00 2001
From: Gergely Attila Kiss <corra971407@gmail.com>
Date: Tue, 12 Nov 2024 18:33:56 +0100
Subject: [PATCH 10/15] implement reshape long

---
 src/commands.jl | 53 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/src/commands.jl b/src/commands.jl
index abae987..e38181d 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -2,7 +2,33 @@
 rewrite(command::Command) = rewrite(Val(command.command), command)
 
 function rewrite(::Val{:reshape_long}, command::Command)
-    error("@reshape long not implemented yet")
+    gc = generate_command(command; options=[:variables], allowed=[:i, :j])
+    (; local_copy, target_df, setup, teardown, arguments, options) = gc
+    get_option(command, :i) isa Nothing && ArgumentError("i() is mandatory. Syntax is @reshape long y1 y2 ... i(var) j(var)") |> throw
+    get_option(command, :j) isa Nothing && ArgumentError("j() is mandatory. Syntax is @reshape long y1 y2 ... i(var) j(var)") |> throw
+    length(get_option(command, :j)) > 1 && ArgumentError("Only one variable can be specified for j() in @reshape long") |> throw
+    i = get_option(command, :i) |> replace_column_references
+    j = get_option(command, :j)[1] |> replace_column_references
+    vars = collect(arguments) |> replace_column_references
+    var_lists = gensym()
+    combined_df = gensym()
+    combined_list = gensym()
+    quote
+        $setup
+        $var_lists = [[Symbol(name) for name in names($target_df) if startswith(name, var)] for var in $vars]
+        $combined_list = [stack($target_df, list, view=true) for list in $var_lists]
+        for (i, df) in enumerate(combined_list)
+            df[:, :j] = df[:, :variable] .|> x -> parse(Int, x[length(String(vars[i]))+1:end])
+            rename!(df, :value => String(vars[i]))
+            select!(df, Not(:variable))
+        end
+        $combined_df = $combined_list[1]
+        for df in $combined_list[2:end]
+            $combined_df = leftjoin($combined_df, df, on=[$i, $j], makeunique=true)
+        end
+        $combined_df = select($combined_df, collect(union(intersect(names.($combined_list)...), String.($vars))))
+        $combined_df |> $teardown |> setdf
+    end |> esc
 end
 
 function rewrite(::Val{:reshape_wide}, command::Command)
@@ -19,16 +45,17 @@ function rewrite(::Val{:reshape_wide}, command::Command)
     length(vars) > 1 ?
     quote
         $setup
-        $df_list = [unstack($target_df, $i, $j, var, renamecols = x -> Symbol(var, x)) for var in $vars]
+        $df_list = [unstack($target_df, $i, $j, var, renamecols=x -> Symbol(var, x)) for var in $vars]
+        $combined_df = innerjoin($df_list, on=$i)
         $combined_df = $df_list[1]
         for df in $df_list[2:end]
-            $combined_df = innerjoin($combined_df, df, on = $i)
+            $combined_df = innerjoin($combined_df, df, on=$i)
         end
         $combined_df |> $teardown |> setdf
-    end |> esc  :
+    end |> esc :
     quote
         $setup
-        unstack($target_df, $i, $j, $vars[1], renamecols = x -> Symbol($vars[1], x)) |> $teardown |> setdf
+        unstack($target_df, $i, $j, $vars[1], renamecols=x -> Symbol($vars[1], x)) |> $teardown |> setdf
     end |> esc
 end
 
@@ -88,7 +115,7 @@ function rewrite(::Val{:keep}, command::Command)
     cols = isempty(command.arguments) ? :(:) : :(collect($command.arguments))
     quote
         $setup
-        $target_df[!, $cols]  |> $teardown |> setdf
+        $target_df[!, $cols] |> $teardown |> setdf
     end |> esc
 end
 
@@ -100,7 +127,7 @@ function rewrite(::Val{:drop}, command::Command)
             $setup
             select!($local_copy, Not(collect($(command.arguments)))) |> $teardown |> setdf
         end |> esc
-    end 
+    end
     bitmask = build_bitmask(local_copy, command.condition)
     return quote
         $setup
@@ -143,7 +170,7 @@ function rewrite(::Val{:sort}, command::Command)
 end
 
 function rewrite(::Val{:order}, command::Command)
-    gc = generate_command(command; options = [:variables, :nofunction], allowed=[:desc, :last, :after, :before , :alphabetical])
+    gc = generate_command(command; options=[:variables, :nofunction], allowed=[:desc, :last, :after, :before, :alphabetical])
     (; local_copy, target_df, setup, teardown, arguments, options) = gc
     desc = :desc in get_top_symbol.(options)
     last = :last in get_top_symbol.(options)
@@ -160,7 +187,7 @@ function rewrite(::Val{:order}, command::Command)
     if desc && !alphabetical
         ArgumentError("Cannot use `desc` without `alphabetical` option in @order") |> throw
     end
-    
+
     if before
         var = get_option(command, :before)
     elseif after
@@ -181,7 +208,7 @@ function rewrite(::Val{:order}, command::Command)
         $setup
         $cols = [Symbol(col) for col in names($target_df) if Symbol(col) ∉ $target_cols]
         if $alphabetical
-            $cols = sort($cols, rev = $desc)
+            $cols = sort($cols, rev=$desc)
         end
 
         if $after
@@ -204,7 +231,7 @@ function rewrite(::Val{:order}, command::Command)
             $cols = pushfirst!($cols, $target_cols...)
         end
 
-        $target_df[!, $cols]|> $teardown
+        $target_df[!, $cols] |> $teardown
     end |> esc
 end
 
@@ -224,7 +251,7 @@ function rewrite(::Val{:mvencode}, command::Command)
     coltype = gensym()
     quote
         $setup
-        $valtype = typeof($value)   
+        $valtype = typeof($value)
         for col in $cols
             $coltype = eltype($local_copy[.!($bitmask), col])
             if $valtype != $coltype
@@ -235,4 +262,4 @@ function rewrite(::Val{:mvencode}, command::Command)
         $local_copy[$bitmask, $cols] = mvreplace.($local_copy[$bitmask, $cols], $value)
         $local_copy |> $teardown
     end |> esc
-end
\ No newline at end of file
+end

From e2845d31565663bc371f575dc4aced197187ed8a Mon Sep 17 00:00:00 2001
From: Gergely Attila Kiss <corra971407@gmail.com>
Date: Wed, 13 Nov 2024 12:57:43 +0100
Subject: [PATCH 11/15] implement working version of reshape long

---
 src/commands.jl  | 19 +++++++++----------
 src/functions.jl | 17 +++++++++++------
 src/macros.jl    |  4 +---
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/commands.jl b/src/commands.jl
index e38181d..ed3a3b9 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -12,21 +12,20 @@ function rewrite(::Val{:reshape_long}, command::Command)
     vars = collect(arguments) |> replace_column_references
     var_lists = gensym()
     combined_df = gensym()
-    combined_list = gensym()
+    df_list = gensym()
     quote
         $setup
-        $var_lists = [[Symbol(name) for name in names($target_df) if startswith(name, var)] for var in $vars]
-        $combined_list = [stack($target_df, list, view=true) for list in $var_lists]
-        for (i, df) in enumerate(combined_list)
-            df[:, :j] = df[:, :variable] .|> x -> parse(Int, x[length(String(vars[i]))+1:end])
-            rename!(df, :value => String(vars[i]))
+        $var_lists = [[Symbol(name) for name in names($target_df) if startswith(name, String(var))] for var in $vars]
+        $df_list = [stack($target_df, list) for list in $var_lists]
+        for (i, df) in enumerate($df_list)
+            df[!, $j] = df[:, :variable] .|> x -> parse(Int, x[length(String($vars[i]))+1:end])
+            rename!(df, :value => String($vars[i]))
             select!(df, Not(:variable))
         end
-        $combined_df = $combined_list[1]
-        for df in $combined_list[2:end]
-            $combined_df = leftjoin($combined_df, df, on=[$i, $j], makeunique=true)
+        $combined_df = $df_list[1]
+        for df in $df_list[2:end]
+            $combined_df = innerjoin($combined_df, df, on=[$i..., $j], makeunique=true)
         end
-        $combined_df = select($combined_df, collect(union(intersect(names.($combined_list)...), String.($vars))))
         $combined_df |> $teardown |> setdf
     end |> esc
 end
diff --git a/src/functions.jl b/src/functions.jl
index 3a2a502..435dc33 100644
--- a/src/functions.jl
+++ b/src/functions.jl
@@ -4,19 +4,19 @@ save(fname::AbstractString) = writestat(fname, getdf())
 function append(fname::AbstractString)
     ispath(fname) || ArgumentError("File $fname does not exist.") |> throw
     _, ext = splitext(fname)
-    if ext in [".dta", ".sav", ".por", ".sas7bdat", ".xpt"]
+    if ext in [".dta", ".save", ".por", ".sas7bdat", ".xpt"]
         df = readstat(fname) |> DataFrame
     else
         df = CSV.read(fname, DataFrame)
     end
     cdf = getdf()
     cdf, df = create_cols(cdf, df)
-    df = vcat(cdf,df)
+    df = vcat(cdf, df)
     setdf(df)
 end
 
 function append(df::DataFrame)
-    cdf, df  = create_cols(getdf(), df)
+    cdf, df = create_cols(getdf(), df)
     setdf(vcat(cdf, df))
 end
 
@@ -49,7 +49,7 @@ getdf() = _global_dataframe
 
 Set the global data frame.
 """
-setdf(df::Union{AbstractDataFrame, Nothing}) = global _global_dataframe = isnothing(df) ? nothing : copy(df)
+setdf(df::Union{AbstractDataFrame,Nothing}) = global _global_dataframe = isnothing(df) ? nothing : copy(df)
 display_and_return(x) = (display(x); x)
 
 """
@@ -80,7 +80,7 @@ function summarize(df::AbstractDataFrame, column::Symbol)::Summarize
     skewness_val = skewness(data)
     # julia reports excess kurtosis, so we add 3 to get the kurtosis
     kurtosis_val = 3.0 + kurtosis(data)
-    
+
     percentiles = [1, 5, 10, 25, 50, 75, 90, 95, 99]
     percentiles_values = quantile(data, percentiles ./ 100; alpha=0.5, beta=0.5)
 
@@ -149,4 +149,9 @@ function _describe(df::AbstractDataFrame, cols::Vector{Symbol}=Symbol[])
     table[!, [:variable, :eltype]]
 end
 
-mvreplace(x, y) = ismissing(x) ? y : x
\ No newline at end of file
+"""
+    mvreplace(x, y)
+
+Return `y` if `x` is `missing`, otherwise return `x`. If `x` is a vector, the operation is vectorized. This function mimics `x ? y : z`, which cannot be vectorized.
+"""
+mvreplace(x, y) = ismissing(x) ? y : x
diff --git a/src/macros.jl b/src/macros.jl
index e745202..e36dd3c 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -233,9 +233,7 @@ The option `i()` may include multiple variables, like `i(var1, var2, var3)`. The
 """
 macro reshape(exprs...)
     if exprs[1] == :long
-        return quote
-            :reshape_long |> parse(exprs[2:end]) |> rewrite
-        end
+        :reshape_long |> parse(exprs[2:end]) |> rewrite
     elseif exprs[1] == :wide
         :reshape_wide |> parse(exprs[2:end]) |> rewrite
     else

From 0b9cc37731b86e64d2da2569497266e7aeec16a2 Mon Sep 17 00:00:00 2001
From: Gergely Attila Kiss <corra971407@gmail.com>
Date: Wed, 13 Nov 2024 13:58:45 +0100
Subject: [PATCH 12/15] add test for reshape long

---
 README.md        |  6 +++---
 src/commands.jl  |  8 ++++----
 test/commands.jl | 25 +++++++++++++++++++------
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 45ba810..7f2b1e4 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ It imports and reexports [CSV](https://csv.juliadata.org/stable/), [DataFrames](
 
 ## Getting started
 
-> `Kezdi.jl` is currently in beta. We have more than 400 unit tests and a large code coverage. [![Coverage](https://codecov.io/gh/codedthinking/Kezdi.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/codedthinking/Kezdi.jl) The package, however, is not guaranteed to be bug-free. If you encounter any issues, please report them as a [GitHub issue](https://github.com/codedthinking/Kezdi.jl/issues/new).
+> `Kezdi.jl` is currently in beta. We have more than 500 unit tests and a large code coverage. [![Coverage](https://codecov.io/gh/codedthinking/Kezdi.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/codedthinking/Kezdi.jl) The package, however, is not guaranteed to be bug-free. If you encounter any issues, please report them as a [GitHub issue](https://github.com/codedthinking/Kezdi.jl/issues/new).
 >
 > If you would like to receive updates on the package, please star the repository on GitHub and sign up for [email notifications here](https://relentless-producer-1210.ck.page/62d7ebb237).
 
@@ -161,7 +161,7 @@ If you need to apply a function to individual elements of a column, you need to
 @generate n_words = length.(words)
 ```
 
-> Here, `words` becomes a vector of vectors, where each element is a vector of words in the corresponding `Model` string. The function `legth.` will operate on each cell in `words`, counting the number of words in each `Model` string. By contrast, `length(words)` would return the number of elements in the `words` vector, which is the number of rows in the DataFrame.
+> Here, `words` becomes a vector of vectors, where each element is a vector of words in the corresponding `Model` string. The function `length.` will operate on each cell in `words`, counting the number of words in each `Model` string. By contrast, `length(words)` would return the number of elements in the `words` vector, which is the number of rows in the DataFrame.
 
 ### The `@if` condition
 Almost every command can be followed by an `@if` condition that filters the data frame. The command will only be executed on the subset of rows for which the condition evaluates to `true`. The condition can use any combination of column names and functions.
@@ -221,4 +221,4 @@ Inspiration for the package came from [Tidier.jl](https://tidierorg.github.io/Ti
 
 The package is built on top of [DataFrames.jl](https://dataframes.juliadata.org/stable/), [FreqTables.jl](https://github.com/nalimilan/FreqTables.jl) and [FixedEffectModels.jl](https://github.com/FixedEffects/FixedEffectModels.jl). The `@with` function relies on [Chain.jl](https://github.com/jkrumbiegel/Chain.jl) by Julius Krumbiegel.
 
-The package is named after [Gabor Kezdi](https://kezdigabor.life/), a Hungarian economist who has made significant contributions to [teaching data analysis](https://gabors-data-analysis.com/).
\ No newline at end of file
+The package is named after [Gabor Kezdi](https://kezdigabor.life/), a Hungarian economist who has made significant contributions to [teaching data analysis](https://gabors-data-analysis.com/).
diff --git a/src/commands.jl b/src/commands.jl
index ed3a3b9..53d57de 100644
--- a/src/commands.jl
+++ b/src/commands.jl
@@ -17,15 +17,16 @@ function rewrite(::Val{:reshape_long}, command::Command)
         $setup
         $var_lists = [[Symbol(name) for name in names($target_df) if startswith(name, String(var))] for var in $vars]
         $df_list = [stack($target_df, list) for list in $var_lists]
-        for (i, df) in enumerate($df_list)
-            df[!, $j] = df[:, :variable] .|> x -> parse(Int, x[length(String($vars[i]))+1:end])
-            rename!(df, :value => String($vars[i]))
+        for (n, df) in enumerate($df_list)
+            df[!, $j] = df[:, :variable] .|> x -> Base.parse(Int, x[length(String($vars[n]))+1:end])
+            rename!(df, :value => String($vars[n]))
             select!(df, Not(:variable))
         end
         $combined_df = $df_list[1]
         for df in $df_list[2:end]
             $combined_df = innerjoin($combined_df, df, on=[$i..., $j], makeunique=true)
         end
+        $combined_df = select!($combined_df, collect(union(intersect(names.($df_list)...), String.($vars))))
         $combined_df |> $teardown |> setdf
     end |> esc
 end
@@ -45,7 +46,6 @@ function rewrite(::Val{:reshape_wide}, command::Command)
     quote
         $setup
         $df_list = [unstack($target_df, $i, $j, var, renamecols=x -> Symbol(var, x)) for var in $vars]
-        $combined_df = innerjoin($df_list, on=$i)
         $combined_df = $df_list[1]
         for df in $df_list[2:end]
             $combined_df = innerjoin($combined_df, df, on=$i)
diff --git a/test/commands.jl b/test/commands.jl
index 76d9b0f..2d6b1ff 100644
--- a/test/commands.jl
+++ b/test/commands.jl
@@ -880,19 +880,32 @@ end
 end
 
 @testset "Reshape long" begin
-    df = DataFrame(i=[1, 1, 2, 2], j=[1, 2, 1, 2], x=1:4, y=5:8)
+    df = DataFrame(i=[1, 1, 2, 2], x1=1:4, x2=5:8)
     @testset "Known values" begin
-        @test_throws UndefVarError df2 = @with df @reshape long x y, i(i) j(j)
+        df2 = @with df @reshape long x, i(i) j(j)
+        @test names(df2) == ["i", "x", "j"]
+        @test all(df2.i .== [1, 1, 2, 2, 1, 1, 2, 2])
+        @test all(df2.j .== [1, 1, 1, 1, 2, 2, 2, 2])
+        @test all(df2.x .== [1, 2, 3, 4, 5, 6, 7, 8])
     end
 
     @testset "Unbalanced panel" begin
-        df = DataFrame(i=[1, 1, 2, 2, 2], j=[1, 2, 1, 2, 3], x=1:5, y=5:9)
-        @test_throws UndefVarError df2 = @with df @reshape long x y, i(i) j(j)
+        df = DataFrame(i=[1, 1, 2, 2, 2], x1=1:5, x2=[missing, 7, missing, 9, 10], y1=5:9, y2=[10, missing, 12, missing, missing])
+        df2 = @with df @reshape long x y, i(i) j(j)
+        @test names(df2) == ["i", "j", "x", "y"]
+        @test all(df2.j .== [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
+        @test all(df2.x .=== [1, 2, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5, missing, 7, missing, 7, missing, 9, 10, missing, 9, 10, missing, 9, 10])
+        @test all(df2.y .=== [5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, missing, missing, 12, 12, 12, missing, missing, missing, missing, missing, missing])
     end
 
     @testset "Multiple i variables" begin
-        df = DataFrame(i1=[1, 1, 2, 2], i2=[0, 0, 0, 1], j=[1, 2, 1, 2], x=1:4, y=5:8)
-        @test_throws UndefVarError df2 = @with df @reshape long x y, i(i1, i2) j(j)
+        df = DataFrame(i1=[1, 1, 2, 2], i2=[0, 0, 0, 1], x1=1:4, x2=5:8)
+        df2 = @with df @reshape long x, i(i1, i2) j(j)
+        @test names(df2) == ["i1", "i2", "x", "j"]
+        @test all(df2.i1 .== [1, 1, 2, 2, 1, 1, 2, 2])
+        @test all(df2.i2 .== [0, 0, 0, 1, 0, 0, 0, 1])
+        @test all(df2.j .== [1, 1, 1, 1, 2, 2, 2, 2])
+        @test all(df2.x .== [1, 2, 3, 4, 5, 6, 7, 8])
     end
 end
 

From fc286ce5a0357fa39ce663815e67542e4cb2ea0e Mon Sep 17 00:00:00 2001
From: Gergely Attila Kiss <corra971407@gmail.com>
Date: Wed, 13 Nov 2024 14:02:26 +0100
Subject: [PATCH 13/15] bump project version

---
 Project.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index 26e9454..93c4555 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Kezdi"
 uuid = "48308a23-c29e-446c-b4c0-d9446a767439"
 authors = ["Miklos Koren <miklos.koren@gmail.com>", "Gergely Attila Kiss <corra971407@gmail.com>"]
-version = "0.5.2"
+version = "0.5.3"
 
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
@@ -14,7 +14,7 @@ FixedEffectModels = "9d5cd8c9-2029-5cab-9928-427838db53e3"
 FreqTables = "da1fdf0e-e0ff-5433-a45f-9bb5ff651cb1"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+Missing = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
 ReadStatTables = "52522f7a-9570-4e34-8ac6-c005c74d4b84"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -32,7 +32,7 @@ FixedEffectModels = "1"
 FreqTables = "0.4"
 InteractiveUtils = "1"
 Logging = "1"
-Missings = "1"
+Missing = "1"
 RDatasets = "0.7"
 ReadStatTables = "0.3"
 Reexport = "1"

From cfd447bbec5d76b281aa1806c5323aafb8da4b90 Mon Sep 17 00:00:00 2001
From: Gergely Attila Kiss <corra971407@gmail.com>
Date: Wed, 13 Nov 2024 14:43:14 +0100
Subject: [PATCH 14/15] update documentation and macros.jl to mimic the doc
 structure

---
 docs/src/index.md |  26 ++++-
 src/macros.jl     | 265 +++++++++++++++++++++++-----------------------
 2 files changed, 159 insertions(+), 132 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 5110ebc..abe681a 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -108,6 +108,10 @@ setdf
 @use
 ```
 
+```@docs
+@save
+```
+
 ```@docs
 getdf
 ```
@@ -132,6 +136,9 @@ getdf
 @clear
 ```
 
+```@docs
+@describe
+```
 ### Filtering columns and rows
 ```@docs
 @keep
@@ -154,6 +161,10 @@ getdf
 @replace
 ```
 
+```@docs
+@mvencode
+```
+
 ```@docs
 @egen
 ```
@@ -166,6 +177,17 @@ getdf
 @sort
 ```
 
+```@docs
+@order
+```
+
+```@docs
+@reshape
+```
+
+```@docs
+@append
+```
 
 ### Summarizing and analyzing data
 ```@docs
@@ -278,7 +300,7 @@ If you need to apply a function to individual elements of a column, you need to
 ```
 
 !!! tip "Note: `length(words)` vs `length.(words)`" 
-    Here, `words` becomes a vector of vectors, where each element is a vector of words in the corresponding `Model` string. The function `legth.` will operate on each cell in `words`, counting the number of words in each `Model` string. By contrast, `length(words)` would return the number of elements in the `words` vector, which is the number of rows in the DataFrame.
+    Here, `words` becomes a vector of vectors, where each element is a vector of words in the corresponding `Model` string. The function `length.` will operate on each cell in `words`, counting the number of words in each `Model` string. By contrast, `length(words)` would return the number of elements in the `words` vector, which is the number of rows in the DataFrame.
 
 ### The `@if` condition
 Almost every command can be followed by an `@if` condition that filters the data frame. The command will only be executed on the subset of rows for which the condition evaluates to `true`. The condition can use any combination of column names and functions.
@@ -470,4 +492,4 @@ Inspiration for the package came from [Tidier.jl](https://tidierorg.github.io/Ti
 
 The package is built on top of [DataFrames.jl](https://dataframes.juliadata.org/stable/), [FreqTables.jl](https://github.com/nalimilan/FreqTables.jl) and [FixedEffectModels.jl](https://github.com/FixedEffects/FixedEffectModels.jl). The `@with` function relies on [Chain.jl](https://github.com/jkrumbiegel/Chain.jl) by Julius Krumbiegel.
 
-The package is named after [Gabor Kezdi](https://kezdigabor.life/), a Hungarian economist who has made significant contributions to [teaching data analysis](https://gabors-data-analysis.com/).
\ No newline at end of file
+The package is named after [Gabor Kezdi](https://kezdigabor.life/), a Hungarian economist who has made significant contributions to [teaching data analysis](https://gabors-data-analysis.com/).
diff --git a/src/macros.jl b/src/macros.jl
index e36dd3c..3fe7ed4 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -3,251 +3,256 @@ macro mockmacro(exprs...)
     parse(exprs, command)
 end
 
+### Setting and inspectiung the global data frame
 """
-    @keep y1 y2 ... [@if condition]
+    @use "filename.dta", [clear]
 
-Keep only the variables `y1`, `y2`, etc. in `df`. If `condition` is provided, only the rows for which the condition is true are kept.  
+Read the data from the file `filename.dta` and set it as the global data frame. If there is already a global data frame, `@use` will throw an error unless the `clear` option is provided
 """
-macro keep(exprs...)
-    :keep |> parse(exprs) |> rewrite
+macro use(exprs...)
+    command = parse(exprs, :use)
+    length(command.arguments) == 1 || ArgumentError("@use takes a single file name as an argument:\n@use \"filename.dta\"[, clear]") |> throw
+    # clear is the only permissible option
+    isempty(filter(x -> x != :clear, command.options)) || ArgumentError("Invalid options $(string.(command.options)). Correct syntax:\n@use \"filename.dta\"[, clear]") |> throw
+    fname = command.arguments[1]
+    clear = :clear in command.options
+    isnothing(getdf()) || clear || ArgumentError("There is already a global data frame set. If you want to replace it, use the \", clear\" option.") |> throw
+
+    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.use($fname)) |> esc
 end
 
 """
-    @drop y1 y2 ... 
-or
-    @drop [@if condition]    
+    @save "filename.dta", [replace]
 
-Drop the variables `y1`, `y2`, etc. from `df`. If `condition` is provided, the rows for which the condition is true are dropped.
+Save the global data frame to the file `filename.dta`. If the file already exists, the `replace` option must be provided.
 """
-macro drop(exprs...)
-    :drop |> parse(exprs) |> rewrite
+macro save(exprs...)
+    command = parse(exprs, :save)
+    length(command.arguments) == 1 || ArgumentError("@save takes a single file name as an argument:\n@save \"filename.dta\"") |> throw
+    isnothing(getdf()) && ArgumentError("There is no data frame to save.") |> throw
+    fname = command.arguments[1]
+    replace = :replace in command.options
+    ispath(fname) && !replace && ArgumentError("File $fname already exists.") |> throw
+    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.save($fname)) |> esc
 end
 
 """
-    @generate y = expr [@if condition]
+    @names
 
-Create a new variable `y` in `df` by evaluating `expr`. If `condition` is provided, the operation is executed only on rows for which the condition is true. When the condition is false, the variable will be missing. 
+Display the names of the variables in the data frame.
 """
-macro generate(exprs...)
-    :generate |> parse(exprs) |> rewrite
+macro names()
+    :(println("$(Kezdi.prompt())@names\n"); names(getdf()) |> display_and_return) |> esc
 end
 
 """
-    @replace y = expr [@if condition]
 
-Replace the values of `y` in `df` with the result of evaluating `expr`. If `condition` is provided, the operation is executed only on rows for which the condition is true. When the condition is false, the variable will be left unchanged.
+    @list [y1 y2...] [@if condition]
+
+Display the entire data frame or the rows for which the condition is true. If variable names are provided, only the variables in the list are displayed.
 """
-macro replace(exprs...)
-    :replace |> parse(exprs) |> rewrite
+macro list(exprs...)
+    :list |> parse(exprs) |> rewrite
 end
 
 """
-    @egen y1 = expr1 y2 = expr2 ... [@if condition], [by(group1, group2, ...)]
+    @head [n]
 
-Generate new variables in `df` by evaluating expressions `expr1`, `expr2`, etc. If `condition` is provided, the operation is executed only on rows for which the condition is true. When the condition is false, the variables will be missing. If `by` is provided, the operation is executed by group.
+Display the first `n` rows of the data frame. By default, `n` is 5.
 """
-macro egen(exprs...)
-    :egen |> parse(exprs) |> rewrite
+macro head(n=5)
+    :(println("$(Kezdi.prompt())@head $($n)\n"); first(getdf(), $n) |> display_and_return) |> esc
 end
 
 """
-    @collapse y1 = expr1 y2 = expr2 ... [@if condition], [by(group1, group2, ...)]
+    @tail [n]
 
-Collapse `df` by evaluating expressions `expr1`, `expr2`, etc. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `by` is provided, the operation is executed by group.
+Display the last `n` rows of the data frame. By default, `n` is 5.
 """
-macro collapse(exprs...)
-    :collapse |> parse(exprs) |> rewrite
+macro tail(n=5)
+    :(println("$(Kezdi.prompt())@tail $($n)\n"); last(getdf(), $n) |> display_and_return) |> esc
 end
 
 """
-    @summarize y [@if condition]
+    @clear
 
-Summarize the variable `y` in `df`. If `condition` is provided, the operation is executed only on rows for which the condition is true.
+Clears the global dataframe.
 """
-macro summarize(exprs...)
-    :summarize |> parse(exprs) |> rewrite
+macro clear()
+    :(println("$(Kezdi.prompt())@clear\n"); setdf(nothing))
 end
 
 """
-    @regress y x1 x2 ... [@if condition], [robust] [cluster(var1, var2, ...)]
-
-Estimate a regression model in `df` with dependent variable `y` and independent variables `x1`, `x2`, etc. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `robust` is provided, robust standard errors are calculated. If `cluster` is provided, clustered standard errors are calculated.
+    @describe [y1] [y2]...
 
-The regression is limited to rows for which all variables are values. Missing values, infinity, and NaN are automatically excluded.
+Show the names and data types of columns of the data frame. If no variable names given, all are shown. 
 """
-macro regress(exprs...)
-    :regress |> parse(exprs) |> rewrite
+macro describe(exprs...)
+    :describe |> parse(exprs) |> rewrite
 end
 
+### Filtering columns and rows
 """
-    @tabulate y1 y2 ... [@if condition]
+    @keep y1 y2 ... [@if condition]
 
-Create a frequency table for the variables `y1`, `y2`, etc. in `df`. If `condition` is provided, the operation is executed only on rows for which the condition is true.
+Keep only the variables `y1`, `y2`, etc. in `df`. If `condition` is provided, only the rows for which the condition is true are kept.  
 """
-macro tabulate(exprs...)
-    :tabulate |> parse(exprs) |> rewrite
+macro keep(exprs...)
+    :keep |> parse(exprs) |> rewrite
 end
 
 """
-    @count [@if condition]
+    @drop y1 y2 ... 
+or
+    @drop [@if condition]    
 
-Count the number of rows for which the condition is true. If `condition` is not provided, the total number of rows is counted.
+Drop the variables `y1`, `y2`, etc. from `df`. If `condition` is provided, the rows for which the condition is true are dropped.
 """
-macro count(exprs...)
-    :count |> parse(exprs) |> rewrite
+macro drop(exprs...)
+    :drop |> parse(exprs) |> rewrite
 end
 
+### Modifying the data
 """
-    @sort y1 y2 ... , [desc]
+    @rename oldname newname
 
-Sort the data frame by the variables `y1`, `y2`, etc. By default, the variables are sorted in ascending order. If `desc` is provided, the variables are sorted in descending order
+Rename the variable `oldname` to `newname` in the data frame.
 """
-macro sort(exprs...)
-    :sort |> parse(exprs) |> rewrite
+macro rename(exprs...)
+    :rename |> parse(exprs) |> rewrite
 end
 
 """
-    @order y1 y2 ... , [desc] [last] [after=var] [before=var] [alphabetical]
+    @generate y = expr [@if condition]
 
-Reorder the variables `y1`, `y2`, etc. in the data frame. By default, the variables are ordered in the order they are listed. If `desc` is provided, the variables are ordered in descending order. If `last` is provided, the variables are moved to the end of the data frame. If `after` is provided, the variables are moved after the variable `var`. If `before` is provided, the variables are moved before the variable `var`. If `alphabetical` is provided, the variables are ordered alphabetically.
+Create a new variable `y` in `df` by evaluating `expr`. If `condition` is provided, the operation is executed only on rows for which the condition is true. When the condition is false, the variable will be missing. 
 """
-macro order(exprs...)
-    :order |> parse(exprs) |> rewrite
+macro generate(exprs...)
+    :generate |> parse(exprs) |> rewrite
 end
 
 """
-    @list [y1 y2...] [@if condition]
+    @replace y = expr [@if condition]
 
-Display the entire data frame or the rows for which the condition is true. If variable names are provided, only the variables in the list are displayed.
+Replace the values of `y` in `df` with the result of evaluating `expr`. If `condition` is provided, the operation is executed only on rows for which the condition is true. When the condition is false, the variable will be left unchanged.
 """
-macro list(exprs...)
-    :list |> parse(exprs) |> rewrite
+macro replace(exprs...)
+    :replace |> parse(exprs) |> rewrite
 end
 
-
 """
-    @use "filename.dta", [clear]
+    @mvencode y1 y2 [_all] ... [if condition], [mv(value)]
 
-Read the data from the file `filename.dta` and set it as the global data frame. If there is already a global data frame, `@use` will throw an error unless the `clear` option is provided
+Encode missing values in the variables `y1`, `y2`, etc. in the data frame. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `mv` is provided, the missing values are encoded with the value `value`. By default value is `missing` making no changes on the dataframe. Using `_all` encodes all variables of the DataFrame.
 """
-macro use(exprs...)
-    command = parse(exprs, :use)
-    length(command.arguments) == 1 || ArgumentError("@use takes a single file name as an argument:\n@use \"filename.dta\"[, clear]") |> throw
-    # clear is the only permissible option
-    isempty(filter(x -> x != :clear, command.options)) || ArgumentError("Invalid options $(string.(command.options)). Correct syntax:\n@use \"filename.dta\"[, clear]") |> throw
-    fname = command.arguments[1]
-    clear = :clear in command.options
-    isnothing(getdf()) || clear || ArgumentError("There is already a global data frame set. If you want to replace it, use the \", clear\" option.") |> throw
-
-    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.use($fname)) |> esc
+macro mvencode(exprs...)
+    :mvencode |> parse(exprs) |> rewrite
 end
 
 """
-    @save "filename.dta", [replace]
+    @egen y1 = expr1 y2 = expr2 ... [@if condition], [by(group1, group2, ...)]
 
-Save the global data frame to the file `filename.dta`. If the file already exists, the `replace` option must be provided.
+Generate new variables in `df` by evaluating expressions `expr1`, `expr2`, etc. If `condition` is provided, the operation is executed only on rows for which the condition is true. When the condition is false, the variables will be missing. If `by` is provided, the operation is executed by group.
 """
-macro save(exprs...)
-    command = parse(exprs, :save)
-    length(command.arguments) == 1 || ArgumentError("@save takes a single file name as an argument:\n@save \"filename.dta\"") |> throw
-    isnothing(getdf()) && ArgumentError("There is no data frame to save.") |> throw
-    fname = command.arguments[1]
-    replace = :replace in command.options
-    ispath(fname) && !replace && ArgumentError("File $fname already exists.") |> throw
-    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.save($fname)) |> esc
+macro egen(exprs...)
+    :egen |> parse(exprs) |> rewrite
 end
 
 """
-    @append "filename.dta"
+    @collapse y1 = expr1 y2 = expr2 ... [@if condition], [by(group1, group2, ...)]
 
-Append the data from the file `filename.dta` to the global data frame. Columns that are not common filled with missing values.
+Collapse `df` by evaluating expressions `expr1`, `expr2`, etc. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `by` is provided, the operation is executed by group.
 """
-macro append(exprs...)
-    command = parse(exprs, :append)
-    length(command.arguments) == 1 || ArgumentError("@append takes a single file name as an argument:\n@append \"filename.dta\"") |> throw
-    isnothing(getdf()) && ArgumentError("There is no data frame to append to.") |> throw
-    fname = command.arguments[1]
-    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.append($fname)) |> esc
+macro collapse(exprs...)
+    :collapse |> parse(exprs) |> rewrite
 end
+
 """
-    @head [n]
+    @sort y1 y2 ... , [desc]
 
-Display the first `n` rows of the data frame. By default, `n` is 5.
+Sort the data frame by the variables `y1`, `y2`, etc. By default, the variables are sorted in ascending order. If `desc` is provided, the variables are sorted in descending order
 """
-macro head(n=5)
-    :(println("$(Kezdi.prompt())@head $($n)\n"); first(getdf(), $n) |> display_and_return) |> esc
+macro sort(exprs...)
+    :sort |> parse(exprs) |> rewrite
 end
 
 """
-    @tail [n]
+    @order y1 y2 ... , [desc] [last] [after=var] [before=var] [alphabetical]
 
-Display the last `n` rows of the data frame. By default, `n` is 5.
+Reorder the variables `y1`, `y2`, etc. in the data frame. By default, the variables are ordered in the order they are listed. If `desc` is provided, the variables are ordered in descending order. If `last` is provided, the variables are moved to the end of the data frame. If `after` is provided, the variables are moved after the variable `var`. If `before` is provided, the variables are moved before the variable `var`. If `alphabetical` is provided, the variables are ordered alphabetically.
 """
-macro tail(n=5)
-    :(println("$(Kezdi.prompt())@tail $($n)\n"); last(getdf(), $n) |> display_and_return) |> esc
+macro order(exprs...)
+    :order |> parse(exprs) |> rewrite
 end
 
 """
-    @names
+    @reshape long y1 y2 ... i(varlist) j(var) 
+    @reshape wide y1 y2 ... i(varlist) j(var)
 
-Display the names of the variables in the data frame.
+Reshape the data frame from wide to long or from long to wide format. The variables `y1`, `y2`, etc. are the variables to be reshaped. The `i(var)` and `j(var)` are the variables that define the row and column indices in the reshaped data frame.
+
+The option `i()` may include multiple variables, like `i(var1, var2, var3)`. The option `j()` must include only one variable.
 """
-macro names()
-    :(println("$(Kezdi.prompt())@names\n"); names(getdf()) |> display_and_return) |> esc
+macro reshape(exprs...)
+    if exprs[1] == :long
+        :reshape_long |> parse(exprs[2:end]) |> rewrite
+    elseif exprs[1] == :wide
+        :reshape_wide |> parse(exprs[2:end]) |> rewrite
+    else
+        return quote
+            ArgumentError("Invalid option $(exprs[1]). Correct syntax:\n@reshape long y1 y2 ... i(var) j(var)\n@reshape wide y1 y2 ... i(var) j(var)") |> throw
+        end
+    end
 end
 
 """
-    @rename oldname newname
+    @append "filename.dta" / @append df
 
-Rename the variable `oldname` to `newname` in the data frame.
+Append the data from the file `filename.dta` or `df` DataFrame to the global data frame. Columns that are not common filled with missing values.
 """
-macro rename(exprs...)
-    :rename |> parse(exprs) |> rewrite
+macro append(exprs...)
+    command = parse(exprs, :append)
+    length(command.arguments) == 1 || ArgumentError("@append takes a single file name as an argument:\n@append \"filename.dta\"") |> throw
+    isnothing(getdf()) && ArgumentError("There is no data frame to append to.") |> throw
+    fname = command.arguments[1]
+    :(println("$(Kezdi.prompt())$($command)\n"); Kezdi.append($fname)) |> esc
 end
 
+### Summarizing and analyzing data
 """
-    @clear
+    @count [@if condition]
 
-Clears the global dataframe.
+Count the number of rows for which the condition is true. If `condition` is not provided, the total number of rows is counted.
 """
-macro clear()
-    :(println("$(Kezdi.prompt())@clear\n"); setdf(nothing))
+macro count(exprs...)
+    :count |> parse(exprs) |> rewrite
 end
 
 """
-    @describe [y1] [y2]...
+    @tabulate y1 y2 ... [@if condition]
 
-Show the names and data types of columns of the data frame. If no variable names given, all are shown. 
+Create a frequency table for the variables `y1`, `y2`, etc. in `df`. If `condition` is provided, the operation is executed only on rows for which the condition is true.
 """
-macro describe(exprs...)
-    :describe |> parse(exprs) |> rewrite
+macro tabulate(exprs...)
+    :tabulate |> parse(exprs) |> rewrite
 end
 
 """
-    @reshape long y1 y2 ... i(varlist) j(var) 
-    @reshape wide y1 y2 ... i(varlist) j(var)
-
-Reshape the data frame from wide to long or from long to wide format. The variables `y1`, `y2`, etc. are the variables to be reshaped. The `i(var)` and `j(var)` are the variables that define the row and column indices in the reshaped data frame.
+    @summarize y [@if condition]
 
-The option `i()` may include multiple variables, like `i(var1, var2, var3)`. The option `j()` must include only one variable.
+Summarize the variable `y` in `df`. If `condition` is provided, the operation is executed only on rows for which the condition is true.
 """
-macro reshape(exprs...)
-    if exprs[1] == :long
-        :reshape_long |> parse(exprs[2:end]) |> rewrite
-    elseif exprs[1] == :wide
-        :reshape_wide |> parse(exprs[2:end]) |> rewrite
-    else
-        return quote
-            ArgumentError("Invalid option $(exprs[1]). Correct syntax:\n@reshape long y1 y2 ... i(var) j(var)\n@reshape wide y1 y2 ... i(var) j(var)") |> throw
-        end
-    end
+macro summarize(exprs...)
+    :summarize |> parse(exprs) |> rewrite
 end
 
 """
-    @mvencode y1 y2 [_all] ... [if condition], [mv(value)]
+    @regress y x1 x2 ... [@if condition], [robust] [cluster(var1, var2, ...)]
 
-Encode missing values in the variables `y1`, `y2`, etc. in the data frame. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `mv` is provided, the missing values are encoded with the value `value`. By default value is `missing` making no changes on the dataframe. Using `_all` encodes all variables of the DataFrame.
+Estimate a regression model in `df` with dependent variable `y` and independent variables `x1`, `x2`, etc. If `condition` is provided, the operation is executed only on rows for which the condition is true. If `robust` is provided, robust standard errors are calculated. If `cluster` is provided, clustered standard errors are calculated.
+
+The regression is limited to rows for which all variables are values. Missing values, infinity, and NaN are automatically excluded.
 """
-macro mvencode(exprs...)
-    :mvencode |> parse(exprs) |> rewrite
+macro regress(exprs...)
+    :regress |> parse(exprs) |> rewrite
 end

From ec538179bdb73c09455a4dc09e31c673b4a4921d Mon Sep 17 00:00:00 2001
From: Gergely Attila Kiss <corra971407@gmail.com>
Date: Wed, 13 Nov 2024 14:51:57 +0100
Subject: [PATCH 15/15] correct Project.toml

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 93c4555..4f71e0a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -14,7 +14,7 @@ FixedEffectModels = "9d5cd8c9-2029-5cab-9928-427838db53e3"
 FreqTables = "da1fdf0e-e0ff-5433-a45f-9bb5ff651cb1"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-Missing = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
 ReadStatTables = "52522f7a-9570-4e34-8ac6-c005c74d4b84"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -32,7 +32,7 @@ FixedEffectModels = "1"
 FreqTables = "0.4"
 InteractiveUtils = "1"
 Logging = "1"
-Missing = "1"
+Missings = "1"
 RDatasets = "0.7"
 ReadStatTables = "0.3"
 Reexport = "1"