Skip to content

Commit

Permalink
Merge pull request #198 from codedthinking/reshape
Browse files Browse the repository at this point in the history
Reshape
  • Loading branch information
gergelyattilakiss authored Nov 7, 2024
2 parents 8710708 + ac881ff commit 2408e2e
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/Kezdi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
Kezdi.jl is a Julia package for data manipulation and analysis. It is inspired by Stata, but it is written in Julia, which makes it faster and more flexible. It is designed to be used in the Julia REPL, but it can also be used in Jupyter notebooks or in scripts.
"""
module Kezdi
export @generate, @replace, @egen, @collapse, @keep, @drop, @summarize, @regress, @use, @tabulate, @count, @sort, @order, @list, @head, @tail, @names, @rename, @clear, @describe, @mvencode, @save, @append

export @generate, @replace, @egen, @collapse, @keep, @drop, @summarize, @regress, @use, @tabulate, @count, @sort, @order, @list, @head, @tail, @names, @rename, @clear, @describe, @mvencode, @save, @append, @reshape

export getdf, setdf, display_and_return, keep_only_values, rowcount, distinct, cond, mvreplace, append

Expand Down
31 changes: 31 additions & 0 deletions src/commands.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,37 @@
# use multiple dispatch to generate code
rewrite(command::Command) = rewrite(Val(command.command), command)

function rewrite(::Val{:reshape_long}, command::Command)
error("@reshape long not implemented yet")
end

function rewrite(::Val{:reshape_wide}, command::Command)
gc = generate_command(command; options=[:variables], allowed=[:i, :j])
(; local_copy, target_df, setup, teardown, arguments, options) = gc
get_option(command, :i) isa Nothing && ArgumentError("i() is mandatory. Syntax is @reshape wide y1 y2 ... i(var) j(var)") |> throw
get_option(command, :j) isa Nothing && ArgumentError("j() is mandatory. Syntax is @reshape wide y1 y2 ... i(var) j(var)") |> throw
length(get_option(command, :j)) > 1 && ArgumentError("Only one variable can be specified for j() in @reshape wide") |> throw
i = get_option(command, :i) |> replace_column_references
j = get_option(command, :j)[1] |> replace_column_references
vars = collect(arguments) |> replace_column_references
df_list = gensym()
combined_df = gensym()
length(vars) > 1 ?
quote
$setup
$df_list = [unstack($target_df, $i, $j, var, renamecols = x -> Symbol(var, x)) for var in $vars]
$combined_df = $df_list[1]
for df in $df_list[2:end]
$combined_df = innerjoin($combined_df, df, on = $i)
end
$combined_df |> $teardown |> setdf
end |> esc :
quote
$setup
unstack($target_df, $i, $j, $vars[1], renamecols = x -> Symbol($vars[1], x)) |> $teardown |> setdf
end |> esc
end

function rewrite(::Val{:rename}, command::Command)
gc = generate_command(command; options=[:variables], allowed=[])
(; local_copy, target_df, setup, teardown, arguments, options) = gc
Expand Down
17 changes: 17 additions & 0 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,23 @@ macro describe(exprs...)
:describe |> parse(exprs) |> rewrite
end

"""
@reshape long y1 y2 ... i(varlist) j(var)
@reshape wide y1 y2 ... i(varlist) j(var)
Reshape the data frame from wide to long or from long to wide format. The variables `y1`, `y2`, etc. are the variables to be reshaped. The `i(var)` and `j(var)` are the variables that define the row and column indices in the reshaped data frame.
The option `i()` may include multiple variables, like `i(var1, var2, var3)`. The option `j()` must include only one variable.
"""
macro reshape(exprs...)
if exprs[1] == :long
:reshape_long |> parse(exprs[2:end]) |> rewrite
elseif exprs[1] == :wide
:reshape_wide |> parse(exprs[2:end]) |> rewrite
else
ArgumentError("Invalid option $(exprs[1]). Correct syntax:\n@reshape long y1 y2 ... i(var) j(var)\n@reshape wide y1 y2 ... i(var) j(var)") |> throw
end
end

"""
@mvencode y1 y2 [_all] ... [if condition], [mv(value)]
Expand Down
52 changes: 52 additions & 0 deletions test/commands.jl
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,58 @@ end
df = DataFrame(x=1:10, y=11:20)
@use "test.dta", clear
@test df == getdf()
try @use "test.dta" @if x<5, clear; catch e; @test e isa LoadError; end
end

@testset "Reshape wide" begin
df = DataFrame(i=[1, 1, 2, 2], j=[1, 2, 1, 2], x=1:4, y=5:8)
@testset "Known values" begin
df2 = @with df @reshape wide x y, i(i) j(j)
@test names(df2) == ["i", "x1", "x2", "y1", "y2"]
@test all(df2.x1 .== [1, 3])
@test all(df2.x2 .== [2, 4])
@test all(df2.y1 .== [5, 7])
@test all(df2.y2 .== [6, 8])
df2 = @with df @reshape wide x, i(i) j(j)
@test names(df2) == ["i", "x1", "x2"]
@test all(df2.x1 .== [1, 3])
@test all(df2.x2 .== [2, 4])
df2 = @with df @reshape wide x, i(j) j(i)
@test names(df2) == ["j", "x1", "x2"]
@test all(df2.x1 .== [1, 2])
@test all(df2.x2 .== [3, 4])
end

@testset "Unbalanced panel" begin
df = DataFrame(i=[1, 1, 2, 2, 2], j=[1, 2, 1, 2, 3], x=1:5, y=5:9)
df2 = @with df @reshape wide x y, i(i) j(j)
@test names(df2) == ["i", "x1", "x2", "x3", "y1", "y2", "y3"]
@test all(df2.x1 .== [1, 3])
@test all(df2.x2 .== [2, 4])
@test all(df2.x3 .=== [missing, 5])
@test all(df2.y1 .== [5, 7])
@test all(df2.y2 .== [6, 8])
@test all(df2.y3 .=== [missing, 9])
df2 = @with df @reshape wide x y, i(j) j(i)
@test names(df2) == ["j", "x1", "x2", "y1", "y2"]
@test all(df2.j .== [1, 2, 3])
@test all(df2.x1 .=== [1, 2, missing])
@test all(df2.x2 .== [3, 4, 5])
@test all(df2.y1 .=== [5, 6, missing])
@test all(df2.y2 .== [7, 8, 9])
end

@testset "Multiple i variables" begin
df = DataFrame(i1=[1, 1, 2, 2], i2=[0, 0, 0, 1], j=[1, 2, 1, 2], x=1:4, y=5:8)
df2 = @with df @reshape wide x y, i(i1, i2) j(j)
@test names(df2) == ["i1", "i2", "x1", "x2", "y1", "y2"]
@test all(df2.i1 .== [1, 2, 2])
@test all(df2.i2 .== [0, 0, 1])
@test all(df2.x1 .=== [1, 3, missing])
@test all(df2.x2 .=== [2, missing, 4])
@test all(df2.y1 .=== [5, 7, missing])
@test all(df2.y2 .=== [6, missing, 8])
end
end

@testset "Save" begin
Expand Down

0 comments on commit 2408e2e

Please sign in to comment.